From 5d8d03ff2086256aa2977e5ec2ecdc048154dc1f Mon Sep 17 00:00:00 2001 From: lokeshrangineni <19699092+lokeshrangineni@users.noreply.github.com> Date: Fri, 8 Nov 2024 02:25:24 -0500 Subject: [PATCH] feat: Adding TLS support for offline server. (#4744) * * Adding TLS support for offline server. * Added test cases for the TLS offline server by creating RemoteOfflineTlsStoreDataSourceCreator Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> * * Fixing the lint error and also integration tests. Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> * * Added documentation for the offline server and moved to how to guide. * Fixing the issue with integration test. Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> * * Added documentation for the offline server and moved to how to guide. * Fixing the issue with integration test. Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> * * fixing the integration test by adding extra flag verify_client Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> * * Adding alias names for the host in self-signed certificate. Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> --------- Signed-off-by: lrangine <19699092+lokeshrangineni@users.noreply.github.com> --- docs/SUMMARY.md | 1 + .../starting-feast-servers-tls-mode.md | 76 +++++++++++++++++-- sdk/python/feast/cli.py | 36 ++++++++- sdk/python/feast/feature_store.py | 13 +++- .../feast/infra/offline_stores/remote.py | 71 ++++++++++++++--- sdk/python/feast/offline_server.py | 51 +++++++++++-- .../feature_repos/repo_configuration.py | 2 + .../universal/data_sources/file.py | 73 ++++++++++++++++-- .../test_universal_historical_retrieval.py | 2 + .../generate_self_signed_certifcate_util.py | 14 +++- 10 files changed, 303 insertions(+), 36 deletions(-) rename docs/{reference => how-to-guides}/starting-feast-servers-tls-mode.md (64%) diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 325b967353..99dd1f0894 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -64,6 +64,7 @@ * [Adding a new online store](how-to-guides/customizing-feast/adding-support-for-a-new-online-store.md) * [Adding a custom provider](how-to-guides/customizing-feast/creating-a-custom-provider.md) * [Adding or reusing tests](how-to-guides/adding-or-reusing-tests.md) +* [Starting Feast servers in TLS(SSL) Mode](how-to-guides/starting-feast-servers-tls-mode.md) ## Reference diff --git a/docs/reference/starting-feast-servers-tls-mode.md b/docs/how-to-guides/starting-feast-servers-tls-mode.md similarity index 64% rename from docs/reference/starting-feast-servers-tls-mode.md rename to docs/how-to-guides/starting-feast-servers-tls-mode.md index 366cd79d56..e1ddbc08be 100644 --- a/docs/reference/starting-feast-servers-tls-mode.md +++ b/docs/how-to-guides/starting-feast-servers-tls-mode.md @@ -1,7 +1,9 @@ # Starting feast servers in TLS (SSL) mode. TLS (Transport Layer Security) and SSL (Secure Sockets Layer) are both protocols encrypts communications between a client and server to provide enhanced security.TLS or SSL words used interchangeably. This article is going to show the sample code to start all the feast servers such as online server, offline server, registry server and UI server in TLS mode. -Also show examples related to feast clients to communicate with the feast servers started in TLS mode. +Also show examples related to feast clients to communicate with the feast servers started in TLS mode. + +We assume you have basic understanding of feast terminology before going through this tutorial, if you are new to feast then we would recommend to go through existing [starter tutorials](./../../examples) of feast. ## Obtaining a self-signed TLS certificate and key In development mode we can generate a self-signed certificate for testing. In an actual production environment it is always recommended to get it from a trusted TLS certificate provider. @@ -17,15 +19,32 @@ The above command will generate two files You can use the public or private keys generated from above command in the rest of the sections in this tutorial. ## Create the feast demo repo for the rest of the sections. -create a feast repo using `feast init` command and use this repo as a demo for subsequent sections. +Create a feast repo and initialize using `feast init` and `feast apply` command and use this repo as a demo for subsequent sections. ```shell feast init feast_repo_ssl_demo -``` -Output is -``` +#output will be something similar as below Creating a new Feast repository in /Documents/Src/feast/feast_repo_ssl_demo. + +cd feast_repo_ssl_demo/feature_repo +feast apply + +#output will be something similar as below +Applying changes for project feast_repo_ssl_demo + +Created project feast_repo_ssl_demo +Created entity driver +Created feature view driver_hourly_stats +Created feature view driver_hourly_stats_fresh +Created on demand feature view transformed_conv_rate +Created on demand feature view transformed_conv_rate_fresh +Created feature service driver_activity_v1 +Created feature service driver_activity_v3 +Created feature service driver_activity_v2 + +Created sqlite table feast_repo_ssl_demo_driver_hourly_stats_fresh +Created sqlite table feast_repo_ssl_demo_driver_hourly_stats ``` You need to execute the feast cli commands from `feast_repo_ssl_demo/feature_repo` directory created from the above `feast init` command. @@ -68,7 +87,7 @@ entity_key_serialization_version: 2 auth: type: no_auth ``` -{% endcode %} + `cert` is an optional configuration to the public certificate path when the online server starts in TLS(SSL) mode. Typically, this file ends with `*.crt`, `*.cer`, or `*.pem`. @@ -106,14 +125,55 @@ entity_key_serialization_version: 2 auth: type: no_auth ``` -{% endcode %} `cert` is an optional configuration to the public certificate path when the registry server starts in TLS(SSL) mode. Typically, this file ends with `*.crt`, `*.cer`, or `*.pem`. ## Starting feast offline server in TLS mode -TBD +To start the offline server in TLS mode, you need to provide the private and public keys using the `--key` and `--cert` arguments with the `feast serve_offline` command. + +```shell +feast serve_offline --key /path/to/key.pem --cert /path/to/cert.pem +``` +You will see the output something similar to as below. Note the server url starts in the `https` mode. + +```shell +11/07/2024 11:10:01 AM feast.offline_server INFO: Found SSL certificates in the args so going to start offline server in TLS(SSL) mode. +11/07/2024 11:10:01 AM feast.offline_server INFO: Offline store server serving at: grpc+tls://127.0.0.1:8815 +11/07/2024 11:10:01 AM feast.offline_server INFO: offline server starting with pid: [11606] +``` + +### Feast client connecting to remote offline sever started in TLS mode. + +Sometimes you may need to pass the self-signed public key to connect to the remote registry server started in SSL mode if you have not added the public key to the certificate store. +You have to add `scheme` to `https`. + +feast client example: +```yaml +project: feast-project +registry: + registry_type: remote + path: https://localhost:6570 + cert: /path/to/cert.pem +provider: local +online_store: + path: http://localhost:6566 + type: remote + cert: /path/to/cert.pem +entity_key_serialization_version: 2 +offline_store: + type: remote + host: localhost + port: 8815 + scheme: https + cert: /path/to/cert.pem +auth: + type: no_auth +``` + +`cert` is an optional configuration to the public certificate path when the registry server starts in TLS(SSL) mode. Typically, this file ends with `*.crt`, `*.cer`, or `*.pem`. +`scheme` should be `https`. By default, it will be `http` so you have to explicitly configure to `https` if you are planning to connect to remote offline server which is started in TLS mode. ## Starting feast UI server (react app) in TLS mode To start the feast UI server in TLS mode, you need to provide the private and public keys using the `--key` and `--cert` arguments with the `feast ui` command. diff --git a/sdk/python/feast/cli.py b/sdk/python/feast/cli.py index 06db93d680..a02013b11f 100644 --- a/sdk/python/feast/cli.py +++ b/sdk/python/feast/cli.py @@ -1114,16 +1114,50 @@ def serve_registry_command( default=DEFAULT_OFFLINE_SERVER_PORT, help="Specify a port for the server", ) +@click.option( + "--key", + "-k", + "tls_key_path", + type=click.STRING, + default="", + show_default=False, + help="path to TLS certificate private key. You need to pass --cert as well to start server in TLS mode", +) +@click.option( + "--cert", + "-c", + "tls_cert_path", + type=click.STRING, + default="", + show_default=False, + help="path to TLS certificate public key. You need to pass --key as well to start server in TLS mode", +) +@click.option( + "--verify_client", + "-v", + "tls_verify_client", + type=click.BOOL, + default="True", + show_default=True, + help="Verify the client or not for the TLS client certificate.", +) @click.pass_context def serve_offline_command( ctx: click.Context, host: str, port: int, + tls_key_path: str, + tls_cert_path: str, + tls_verify_client: bool, ): """Start a remote server locally on a given host, port.""" + if (tls_key_path and not tls_cert_path) or (not tls_key_path and tls_cert_path): + raise click.BadParameter( + "Please pass --cert and --key args to start the offline server in TLS mode." + ) store = create_feature_store(ctx) - store.serve_offline(host, port) + store.serve_offline(host, port, tls_key_path, tls_cert_path, tls_verify_client) @cli.command("validate") diff --git a/sdk/python/feast/feature_store.py b/sdk/python/feast/feature_store.py index c37584609f..79a0d752ef 100644 --- a/sdk/python/feast/feature_store.py +++ b/sdk/python/feast/feature_store.py @@ -1958,11 +1958,20 @@ def serve_registry( self, port=port, tls_key_path=tls_key_path, tls_cert_path=tls_cert_path ) - def serve_offline(self, host: str, port: int) -> None: + def serve_offline( + self, + host: str, + port: int, + tls_key_path: str = "", + tls_cert_path: str = "", + tls_verify_client: bool = True, + ) -> None: """Start offline server locally on a given port.""" from feast import offline_server - offline_server.start_server(self, host, port) + offline_server.start_server( + self, host, port, tls_key_path, tls_cert_path, tls_verify_client + ) def serve_transformations(self, port: int) -> None: """Start the feature transformation server locally on a given port.""" diff --git a/sdk/python/feast/infra/offline_stores/remote.py b/sdk/python/feast/infra/offline_stores/remote.py index 7ee018ac6d..6f26e06c6b 100644 --- a/sdk/python/feast/infra/offline_stores/remote.py +++ b/sdk/python/feast/infra/offline_stores/remote.py @@ -70,22 +70,45 @@ def list_actions(self, options: FlightCallOptions = None): return super().list_actions(options) -def build_arrow_flight_client(host: str, port, auth_config: AuthConfig): +def build_arrow_flight_client( + scheme: str, host: str, port, auth_config: AuthConfig, cert: str = "" +): + arrow_scheme = "grpc+tcp" + if cert: + logger.info( + "Scheme is https so going to connect offline server in SSL(TLS) mode." + ) + arrow_scheme = "grpc+tls" + + kwargs = {} + if cert: + with open(cert, "rb") as root_certs: + kwargs["tls_root_certs"] = root_certs.read() + if auth_config.type != AuthType.NONE.value: middlewares = [FlightAuthInterceptorFactory(auth_config)] - return FeastFlightClient(f"grpc://{host}:{port}", middleware=middlewares) + return FeastFlightClient( + f"{arrow_scheme}://{host}:{port}", middleware=middlewares, **kwargs + ) - return FeastFlightClient(f"grpc://{host}:{port}") + return FeastFlightClient(f"{arrow_scheme}://{host}:{port}", **kwargs) class RemoteOfflineStoreConfig(FeastConfigBaseModel): type: Literal["remote"] = "remote" + + scheme: Literal["http", "https"] = "http" + host: StrictStr """ str: remote offline store server port, e.g. the host URL for offline store of arrow flight server. """ port: Optional[StrictInt] = None """ str: remote offline store server port.""" + cert: StrictStr = "" + """ str: Path to the public certificate when the offline server starts in TLS(SSL) mode. This may be needed if the offline server started with a self-signed certificate, typically this file ends with `*.crt`, `*.cer`, or `*.pem`. + If type is 'remote', then this configuration is needed to connect to remote offline server in TLS mode. """ + class RemoteRetrievalJob(RetrievalJob): def __init__( @@ -178,7 +201,11 @@ def get_historical_features( assert isinstance(config.offline_store, RemoteOfflineStoreConfig) client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + scheme=config.offline_store.scheme, + host=config.offline_store.host, + port=config.offline_store.port, + auth_config=config.auth_config, + cert=config.offline_store.cert, ) feature_view_names = [fv.name for fv in feature_views] @@ -214,7 +241,11 @@ def pull_all_from_table_or_query( # Initialize the client connection client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + scheme=config.offline_store.scheme, + host=config.offline_store.host, + port=config.offline_store.port, + auth_config=config.auth_config, + cert=config.offline_store.cert, ) api_parameters = { @@ -247,7 +278,11 @@ def pull_latest_from_table_or_query( # Initialize the client connection client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + config.offline_store.scheme, + config.offline_store.host, + config.offline_store.port, + config.auth_config, + cert=config.offline_store.cert, ) api_parameters = { @@ -282,7 +317,11 @@ def write_logged_features( # Initialize the client connection client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + config.offline_store.scheme, + config.offline_store.host, + config.offline_store.port, + config.auth_config, + config.offline_store.cert, ) api_parameters = { @@ -308,7 +347,11 @@ def offline_write_batch( # Initialize the client connection client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + config.offline_store.scheme, + config.offline_store.host, + config.offline_store.port, + config.auth_config, + config.offline_store.cert, ) feature_view_names = [feature_view.name] @@ -336,7 +379,11 @@ def validate_data_source( assert isinstance(config.offline_store, RemoteOfflineStoreConfig) client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + config.offline_store.scheme, + config.offline_store.host, + config.offline_store.port, + config.auth_config, + config.offline_store.cert, ) api_parameters = { @@ -357,7 +404,11 @@ def get_table_column_names_and_types_from_data_source( assert isinstance(config.offline_store, RemoteOfflineStoreConfig) client = build_arrow_flight_client( - config.offline_store.host, config.offline_store.port, config.auth_config + config.offline_store.scheme, + config.offline_store.host, + config.offline_store.port, + config.auth_config, + config.offline_store.cert, ) api_parameters = { diff --git a/sdk/python/feast/offline_server.py b/sdk/python/feast/offline_server.py index cec043129e..8774dea8ae 100644 --- a/sdk/python/feast/offline_server.py +++ b/sdk/python/feast/offline_server.py @@ -39,12 +39,22 @@ class OfflineServer(fl.FlightServerBase): - def __init__(self, store: FeatureStore, location: str, **kwargs): + def __init__( + self, + store: FeatureStore, + location: str, + host: str = "localhost", + tls_certificates: List = [], + verify_client=False, + **kwargs, + ): super(OfflineServer, self).__init__( - location, + location=location, middleware=self.arrow_flight_auth_middleware( str_to_auth_manager_type(store.config.auth_config.type) ), + tls_certificates=tls_certificates, + verify_client=verify_client, **kwargs, ) self._location = location @@ -52,6 +62,8 @@ def __init__(self, store: FeatureStore, location: str, **kwargs): self.flights: Dict[str, Any] = {} self.store = store self.offline_store = get_offline_store_from_config(store.config.offline_store) + self.host = host + self.tls_certificates = tls_certificates def arrow_flight_auth_middleware( self, @@ -81,8 +93,13 @@ def descriptor_to_key(self, descriptor: fl.FlightDescriptor): ) def _make_flight_info(self, key: Any, descriptor: fl.FlightDescriptor): - endpoints = [fl.FlightEndpoint(repr(key), [self._location])] - # TODO calculate actual schema from the given features + if len(self.tls_certificates) != 0: + location = fl.Location.for_grpc_tls(self.host, self.port) + else: + location = fl.Location.for_grpc_tcp(self.host, self.port) + endpoints = [ + fl.FlightEndpoint(repr(key), [location]), + ] schema = pa.schema([]) return fl.FlightInfo(schema, descriptor, endpoints, -1, -1) @@ -549,11 +566,33 @@ def start_server( store: FeatureStore, host: str, port: int, + tls_key_path: str = "", + tls_cert_path: str = "", + tls_verify_client: bool = True, ): _init_auth_manager(store) - location = "grpc+tcp://{}:{}".format(host, port) - server = OfflineServer(store, location) + tls_certificates = [] + scheme = "grpc+tcp" + if tls_key_path and tls_cert_path: + logger.info( + "Found SSL certificates in the args so going to start offline server in TLS(SSL) mode." + ) + scheme = "grpc+tls" + with open(tls_cert_path, "rb") as cert_file: + tls_cert_chain = cert_file.read() + with open(tls_key_path, "rb") as key_file: + tls_private_key = key_file.read() + tls_certificates.append((tls_cert_chain, tls_private_key)) + + location = "{}://{}:{}".format(scheme, host, port) + server = OfflineServer( + store, + location=location, + host=host, + tls_certificates=tls_certificates, + verify_client=tls_verify_client, + ) try: logger.info(f"Offline store server serving at: {location}") server.serve() diff --git a/sdk/python/tests/integration/feature_repos/repo_configuration.py b/sdk/python/tests/integration/feature_repos/repo_configuration.py index c688a84836..bf46468160 100644 --- a/sdk/python/tests/integration/feature_repos/repo_configuration.py +++ b/sdk/python/tests/integration/feature_repos/repo_configuration.py @@ -49,6 +49,7 @@ FileDataSourceCreator, RemoteOfflineOidcAuthStoreDataSourceCreator, RemoteOfflineStoreDataSourceCreator, + RemoteOfflineTlsStoreDataSourceCreator, ) from tests.integration.feature_repos.universal.data_sources.redshift import ( RedshiftDataSourceCreator, @@ -131,6 +132,7 @@ ("local", DuckDBDeltaDataSourceCreator), ("local", RemoteOfflineStoreDataSourceCreator), ("local", RemoteOfflineOidcAuthStoreDataSourceCreator), + ("local", RemoteOfflineTlsStoreDataSourceCreator), ] if os.getenv("FEAST_IS_LOCAL_TEST", "False") == "True": diff --git a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py index 35325c2737..dc716f45e1 100644 --- a/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py +++ b/sdk/python/tests/integration/feature_repos/universal/data_sources/file.py @@ -34,6 +34,7 @@ DataSourceCreator, ) from tests.utils.auth_permissions_util import include_auth_config +from tests.utils.generate_self_signed_certifcate_util import generate_self_signed_cert from tests.utils.http_server import check_port_open, free_port # noqa: E402 logger = logging.getLogger(__name__) @@ -410,11 +411,73 @@ def setup(self, registry: RegistryConfig): ) return "grpc+tcp://{}:{}".format(host, self.server_port) + +class RemoteOfflineTlsStoreDataSourceCreator(FileDataSourceCreator): + def __init__(self, project_name: str, *args, **kwargs): + super().__init__(project_name) + self.server_port: int = 0 + self.proc: Optional[Popen[bytes]] = None + + def setup(self, registry: RegistryConfig): + parent_offline_config = super().create_offline_store_config() + config = RepoConfig( + project=self.project_name, + provider="local", + offline_store=parent_offline_config, + registry=registry.path, + entity_key_serialization_version=2, + ) + + certificates_path = tempfile.mkdtemp() + tls_key_path = os.path.join(certificates_path, "key.pem") + self.tls_cert_path = os.path.join(certificates_path, "cert.pem") + generate_self_signed_cert(cert_path=self.tls_cert_path, key_path=tls_key_path) + + repo_path = Path(tempfile.mkdtemp()) + with open(repo_path / "feature_store.yaml", "w") as outfile: + yaml.dump(config.model_dump(by_alias=True), outfile) + repo_path = repo_path.resolve() + + self.server_port = free_port() + host = "0.0.0.0" + cmd = [ + "feast", + "-c" + str(repo_path), + "serve_offline", + "--host", + host, + "--port", + str(self.server_port), + "--key", + str(tls_key_path), + "--cert", + str(self.tls_cert_path), + # This is needed for the self-signed certificate, disabled verify_client for integration tests. + "--verify_client", + str(False), + ] + self.proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL + ) + + _time_out_sec: int = 60 + # Wait for server to start + wait_retry_backoff( + lambda: (None, check_port_open(host, self.server_port)), + timeout_secs=_time_out_sec, + timeout_msg=f"Unable to start the feast remote offline server in {_time_out_sec} seconds at port={self.server_port}", + ) + return "grpc+tls://{}:{}".format(host, self.server_port) + def create_offline_store_config(self) -> FeastConfigBaseModel: - self.remote_offline_store_config = RemoteOfflineStoreConfig( - type="remote", host="0.0.0.0", port=self.server_port + remote_offline_store_config = RemoteOfflineStoreConfig( + type="remote", + host="0.0.0.0", + port=self.server_port, + scheme="https", + cert=self.tls_cert_path, ) - return self.remote_offline_store_config + return remote_offline_store_config def teardown(self): super().teardown() @@ -499,10 +562,10 @@ def setup(self, registry: RegistryConfig): return "grpc+tcp://{}:{}".format(host, self.server_port) def create_offline_store_config(self) -> FeastConfigBaseModel: - self.remote_offline_store_config = RemoteOfflineStoreConfig( + remote_offline_store_config = RemoteOfflineStoreConfig( type="remote", host="0.0.0.0", port=self.server_port ) - return self.remote_offline_store_config + return remote_offline_store_config def get_keycloak_url(self): return self.keycloak_url diff --git a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py index 97ad54251f..3f28245f3c 100644 --- a/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py +++ b/sdk/python/tests/integration/offline_store/test_universal_historical_retrieval.py @@ -23,6 +23,7 @@ from tests.integration.feature_repos.universal.data_sources.file import ( RemoteOfflineOidcAuthStoreDataSourceCreator, RemoteOfflineStoreDataSourceCreator, + RemoteOfflineTlsStoreDataSourceCreator, ) from tests.integration.feature_repos.universal.data_sources.snowflake import ( SnowflakeDataSourceCreator, @@ -166,6 +167,7 @@ def test_historical_features_main( environment.data_source_creator, ( RemoteOfflineStoreDataSourceCreator, + RemoteOfflineTlsStoreDataSourceCreator, RemoteOfflineOidcAuthStoreDataSourceCreator, ), ): diff --git a/sdk/python/tests/utils/generate_self_signed_certifcate_util.py b/sdk/python/tests/utils/generate_self_signed_certifcate_util.py index 1b0b212818..559ee18cde 100644 --- a/sdk/python/tests/utils/generate_self_signed_certifcate_util.py +++ b/sdk/python/tests/utils/generate_self_signed_certifcate_util.py @@ -1,3 +1,4 @@ +import ipaddress import logging from datetime import datetime, timedelta @@ -36,6 +37,14 @@ def generate_self_signed_cert( ] ) + # Define the certificate's Subject Alternative Names (SANs) + alt_names = [ + x509.DNSName("localhost"), # Hostname + x509.IPAddress(ipaddress.IPv4Address("127.0.0.1")), # Localhost IP + x509.IPAddress(ipaddress.IPv4Address("0.0.0.0")), # Bind-all IP (optional) + ] + san = x509.SubjectAlternativeName(alt_names) + certificate = ( x509.CertificateBuilder() .subject_name(subject) @@ -47,10 +56,7 @@ def generate_self_signed_cert( # Certificate valid for 1 year datetime.utcnow() + timedelta(days=365) ) - .add_extension( - x509.SubjectAlternativeName([x509.DNSName(common_name)]), - critical=False, - ) + .add_extension(san, critical=False) .sign(key, hashes.SHA256(), default_backend()) )