From 60af95f55c92a611d26a2545d166c3657134b4c9 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 29 Jan 2024 14:52:08 +0100 Subject: [PATCH 01/27] add active_stack to run_config --- my_steps.py | 8 +++++ .../config/pipeline_run_configuration.py | 1 + src/zenml/new/pipelines/pipeline.py | 22 +++++++++++-- src/zenml/stack/utils.py | 25 +++++++++++++- .../functional/cli/test_pipeline.py | 33 +++++++++++++++++++ 5 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 my_steps.py diff --git a/my_steps.py b/my_steps.py new file mode 100644 index 0000000000..a9decd0bfc --- /dev/null +++ b/my_steps.py @@ -0,0 +1,8 @@ +from zenml.steps import step +@step +def s1() -> int: + return 1 + +@step +def s2(inp: int) -> None: + pass \ No newline at end of file diff --git a/src/zenml/config/pipeline_run_configuration.py b/src/zenml/config/pipeline_run_configuration.py index b9f50d7a09..dcf1d454ac 100644 --- a/src/zenml/config/pipeline_run_configuration.py +++ b/src/zenml/config/pipeline_run_configuration.py @@ -29,6 +29,7 @@ class PipelineRunConfiguration( ): """Class for pipeline run configurations.""" + active_stack: Optional[str] = None run_name: Optional[str] = None enable_cache: Optional[bool] = None enable_artifact_metadata: Optional[bool] = None diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 985fa265f7..f449bb248a 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -73,6 +73,7 @@ from zenml.new.pipelines.model_utils import NewModelRequest from zenml.orchestrators.utils import get_run_name from zenml.stack import Stack +from zenml.stack.utils import stack_context from zenml.steps import BaseStep from zenml.steps.entrypoint_function_utils import ( StepArtifact, @@ -534,7 +535,9 @@ def build( Returns: The build output. """ - with track_handler(event=AnalyticsEvent.BUILD_PIPELINE): + with track_handler( + event=AnalyticsEvent.BUILD_PIPELINE + ), stack_context(): self._prepare_if_possible() deployment, pipeline_spec, _, _ = self._compile( config_path=config_path, @@ -620,7 +623,9 @@ def _run( logger.info(f"Initiating a new run for the pipeline: `{self.name}`.") - with track_handler(AnalyticsEvent.RUN_PIPELINE) as analytics_handler: + with track_handler( + AnalyticsEvent.RUN_PIPELINE + ) as analytics_handler, stack_context(): deployment, pipeline_spec, schedule, build = self._compile( config_path=config_path, run_name=run_name, @@ -1151,6 +1156,8 @@ def _compile( # Update with the values in code so they take precedence run_config = pydantic_utils.update_model(run_config, update=update) + self._update_stack_from_config(run_config) + deployment, pipeline_spec = Compiler().compile( pipeline=self, stack=Client().active_stack, @@ -1560,3 +1567,14 @@ def _prepare_if_possible(self) -> None: ) else: self.prepare() + + def _update_stack_from_config( + self, run_configuration: PipelineRunConfiguration + ) -> None: + """Active the stack from the pupeline run configuation if one is given. + + Args: + run_configuration: The run configuration for this pipeline. + """ + if run_configuration.active_stack is not None: + Client().activate_stack(run_configuration.active_stack) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index fa1cc7da77..7589f7754f 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -13,7 +13,8 @@ # permissions and limitations under the License. """Util functions for handling stacks, components, and flavors.""" -from typing import Any, Dict, Optional +from types import TracebackType +from typing import Any, Dict, Optional, Type from zenml.client import Client from zenml.enums import StackComponentType, StoreType @@ -139,3 +140,25 @@ def get_flavor_by_name_and_type_from_zen_store( f"'{component_type}' exists." ) return flavors[0] + + +class stack_context: + """Context handler to reset the original active stack.""" + + def __init__(self) -> None: + """Constructor for stack_context saves active stack.""" + self._default_stack = Client().active_stack + + def __enter__(self): + """Enters in the stack context.""" + pass + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + """Restores the original active stak.""" + if self._default_stack.id != Client().active_stack: + Client().activate_stack(self._default_stack.id) diff --git a/tests/integration/functional/cli/test_pipeline.py b/tests/integration/functional/cli/test_pipeline.py index 76aab5720c..3304b4fd51 100644 --- a/tests/integration/functional/cli/test_pipeline.py +++ b/tests/integration/functional/cli/test_pipeline.py @@ -368,6 +368,39 @@ def test_pipeline_run_with_config_file(clean_client: "Client", tmp_path): assert runs[0].name == "custom_run_name" +def test_pipeline_run_with_different_stack_in_config_file( + clean_client: "Client", tmp_path +): + """Tests that the run command works with a run config file with an active stack defined.""" + runner = CliRunner() + run_command = cli.commands["pipeline"].commands["run"] + + pipeline_id = pipeline_instance.register().id + + components = { + key: components[0].id + for key, components in Client().active_stack_model.components.items() + } + new_stack = Client().create_stack(name="new", components=components) + + config_path = tmp_path / "config.yaml" + run_config = PipelineRunConfiguration( + run_name="custom_run_name", active_stack=str(new_stack.id) + ) + config_path.write_text(run_config.yaml()) + + result = runner.invoke( + run_command, [pipeline_instance.name, "--config", str(config_path)] + ) + assert result.exit_code == 0 + + runs = Client().list_pipeline_runs(pipeline_id=pipeline_id) + assert len(runs) == 1 + assert runs[0].name == "custom_run_name" + assert runs[0].stack.id == new_stack.id + assert Client().active_stack.id != new_stack.id + + def test_pipeline_run_with_different_stack(clean_client: "Client"): """Tests that the run command works with a different stack.""" runner = CliRunner() From ac820e59371ff5af1d1f28b3919b6988b9e8b077 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 29 Jan 2024 14:54:07 +0100 Subject: [PATCH 02/27] remove test generated file --- my_steps.py | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 my_steps.py diff --git a/my_steps.py b/my_steps.py deleted file mode 100644 index a9decd0bfc..0000000000 --- a/my_steps.py +++ /dev/null @@ -1,8 +0,0 @@ -from zenml.steps import step -@step -def s1() -> int: - return 1 - -@step -def s2(inp: int) -> None: - pass \ No newline at end of file From 9f765598c8c1c338a2aba8c09a699fb1d2d87c25 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 29 Jan 2024 15:31:55 +0100 Subject: [PATCH 03/27] fix typos --- src/zenml/new/pipelines/pipeline.py | 2 +- src/zenml/stack/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index f449bb248a..242a662e70 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -1571,7 +1571,7 @@ def _prepare_if_possible(self) -> None: def _update_stack_from_config( self, run_configuration: PipelineRunConfiguration ) -> None: - """Active the stack from the pupeline run configuation if one is given. + """Active the stack from the pipeline run configuation if one is given. Args: run_configuration: The run configuration for this pipeline. diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 7589f7754f..84165884fc 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -159,6 +159,6 @@ def __exit__( exc_value: Optional[BaseException], traceback: Optional[TracebackType], ) -> None: - """Restores the original active stak.""" + """Restores the original active stack.""" if self._default_stack.id != Client().active_stack: Client().activate_stack(self._default_stack.id) From fd1f482bce086e36ee44c9ebbd803a6b59d80a19 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 29 Jan 2024 15:34:34 +0100 Subject: [PATCH 04/27] fix typo --- src/zenml/new/pipelines/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 242a662e70..0f4822fce9 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -1571,7 +1571,7 @@ def _prepare_if_possible(self) -> None: def _update_stack_from_config( self, run_configuration: PipelineRunConfiguration ) -> None: - """Active the stack from the pipeline run configuation if one is given. + """Activate the stack from the pipeline run configuation if one is given. Args: run_configuration: The run configuration for this pipeline. From a98112ce80fefdd214320004a8aaba555409a9a7 Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Fri, 2 Feb 2024 13:34:52 +0100 Subject: [PATCH 05/27] Update src/zenml/new/pipelines/pipeline.py fix typo in docstring Co-authored-by: Alex Strick van Linschoten --- src/zenml/new/pipelines/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 70e4ab2650..c0dedc97fc 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -1575,7 +1575,7 @@ def _prepare_if_possible(self) -> None: def _update_stack_from_config( self, run_configuration: PipelineRunConfiguration ) -> None: - """Activate the stack from the pipeline run configuation if one is given. + """Activate the stack from the pipeline run configuration if one is given. Args: run_configuration: The run configuration for this pipeline. From c2d77745843bbcba3604bdbbd63b2c271e9d2918 Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Fri, 2 Feb 2024 13:35:10 +0100 Subject: [PATCH 06/27] Update src/zenml/stack/utils.py add missing return type Co-authored-by: Alex Strick van Linschoten --- src/zenml/stack/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 84165884fc..264a47d0ac 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -149,7 +149,7 @@ def __init__(self) -> None: """Constructor for stack_context saves active stack.""" self._default_stack = Client().active_stack - def __enter__(self): + def __enter__(self) -> None: """Enters in the stack context.""" pass From 8242efb0d256206f8c8be140eecb45bcd010033e Mon Sep 17 00:00:00 2001 From: kabinja Date: Sun, 4 Feb 2024 20:02:52 +0100 Subject: [PATCH 07/27] fix doc string for __exit__ method of stack_context --- src/zenml/stack/utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 264a47d0ac..93c1b2404e 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -155,10 +155,18 @@ def __enter__(self) -> None: def __exit__( self, - exc_type: Optional[Type[BaseException]], - exc_value: Optional[BaseException], + exception_type: Optional[Type[BaseException]], + exception_value: Optional[BaseException], traceback: Optional[TracebackType], ) -> None: - """Restores the original active stack.""" + """Get a stack component flavor by name and type from a ZenStore. + + Args: + exception_type: Type of the execption that was raised. + None if no execption. + exception_value: Type of exception that was raised. + e.g., divide_by_zero error. None if no exception. + traceback: Traceback report. None if no excpetion. + """ if self._default_stack.id != Client().active_stack: Client().activate_stack(self._default_stack.id) From fbe12e00d7f5168205808585c540e3ad2f93c952 Mon Sep 17 00:00:00 2001 From: kabinja Date: Sun, 4 Feb 2024 20:07:35 +0100 Subject: [PATCH 08/27] fix linting --- src/zenml/stack/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 93c1b2404e..b962319700 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -162,11 +162,11 @@ def __exit__( """Get a stack component flavor by name and type from a ZenStore. Args: - exception_type: Type of the execption that was raised. + exception_type: Type of the execption that was raised. None if no execption. exception_value: Type of exception that was raised. e.g., divide_by_zero error. None if no exception. traceback: Traceback report. None if no excpetion. - """ + """ if self._default_stack.id != Client().active_stack: Client().activate_stack(self._default_stack.id) From 80a578daaa5d67342a0af835f96331f12f1fc161 Mon Sep 17 00:00:00 2001 From: kabinja Date: Sun, 4 Feb 2024 20:34:34 +0100 Subject: [PATCH 09/27] add documentation --- .../pipelining-features/configure-steps-pipelines.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md index 439ce16ab4..a4d2ee3ecc 100644 --- a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md +++ b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md @@ -132,6 +132,7 @@ python run.py An example of a generated YAML configuration template ```yaml +active_stack: Optional[str] build: Union[PipelineBuildBase, UUID, NoneType] enable_artifact_metadata: Optional[bool] enable_artifact_visualization: Optional[bool] @@ -330,6 +331,12 @@ These are boolean flags for various configurations: * `enable_cache`: Utilize [caching](../../starter-guide/cache-previous-executions.md) or not. * `enable_step_logs`: Enable tracking [step logs](managing-steps.md#enable-or-disable-logs-storing). +### `active_stack` name or ID + +The name of the UUID of the `active stack` to use for this +pipeline. If specified the active stack is set for the duration of the pipeline execution and restored upon +completion. If not specified, the current active stack is used. + ### `build` ID The UUID of the [`build`](../infrastructure-management/containerize-your-pipeline.md) to use for this pipeline. If specified, Docker image building is skipped for remote orchestrators, and the Docker image specified in this build is used. From 001a0314fafc3b0b2222513525c71ae499ca86c9 Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Mon, 5 Feb 2024 14:32:19 +0100 Subject: [PATCH 10/27] Update docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md Co-authored-by: Alex Strick van Linschoten --- .../pipelining-features/configure-steps-pipelines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md index a4d2ee3ecc..a11db601a7 100644 --- a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md +++ b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md @@ -333,7 +333,7 @@ These are boolean flags for various configurations: ### `active_stack` name or ID -The name of the UUID of the `active stack` to use for this +The name or the UUID of the `active stack` to use for this pipeline. If specified the active stack is set for the duration of the pipeline execution and restored upon completion. If not specified, the current active stack is used. From 323d33c03744a4b957c0921a6280496ec4ffbbce Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Mon, 5 Feb 2024 14:32:26 +0100 Subject: [PATCH 11/27] Update docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md Co-authored-by: Alex Strick van Linschoten --- .../pipelining-features/configure-steps-pipelines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md index a11db601a7..c8d42c199b 100644 --- a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md +++ b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md @@ -334,7 +334,7 @@ These are boolean flags for various configurations: ### `active_stack` name or ID The name or the UUID of the `active stack` to use for this -pipeline. If specified the active stack is set for the duration of the pipeline execution and restored upon +pipeline. If specified, the active stack is set for the duration of the pipeline execution and restored upon completion. If not specified, the current active stack is used. ### `build` ID From 8959f073a5b1df3078c589a39cde96f4ea960b2f Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Mon, 5 Feb 2024 14:32:36 +0100 Subject: [PATCH 12/27] Update src/zenml/stack/utils.py Co-authored-by: Alex Strick van Linschoten --- src/zenml/stack/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index b962319700..04fb98919d 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -162,8 +162,8 @@ def __exit__( """Get a stack component flavor by name and type from a ZenStore. Args: - exception_type: Type of the execption that was raised. - None if no execption. + exception_type: Type of the exception that was raised. + None if no exception. exception_value: Type of exception that was raised. e.g., divide_by_zero error. None if no exception. traceback: Traceback report. None if no excpetion. From dfc3115bc236357d33b653994b201442585c5581 Mon Sep 17 00:00:00 2001 From: Renaud Rwemalika Date: Mon, 5 Feb 2024 14:32:44 +0100 Subject: [PATCH 13/27] Update src/zenml/stack/utils.py Co-authored-by: Alex Strick van Linschoten --- src/zenml/stack/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 04fb98919d..35868ef943 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -166,7 +166,7 @@ def __exit__( None if no exception. exception_value: Type of exception that was raised. e.g., divide_by_zero error. None if no exception. - traceback: Traceback report. None if no excpetion. + traceback: Traceback report. None if no exception. """ if self._default_stack.id != Client().active_stack: Client().activate_stack(self._default_stack.id) From f141fedf109c17b66e34889addd66b0d705a5db9 Mon Sep 17 00:00:00 2001 From: Alex Strick van Linschoten Date: Tue, 6 Feb 2024 16:22:26 +0100 Subject: [PATCH 14/27] update TOC (#2406) --- docs/book/toc.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/book/toc.md b/docs/book/toc.md index 4d2df6de5b..b560ade1ef 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -94,6 +94,7 @@ * [Tekton Orchestrator](stacks-and-components/component-guide/orchestrators/tekton.md) * [Airflow Orchestrator](stacks-and-components/component-guide/orchestrators/airflow.md) * [Skypilot VM Orchestrator](stacks-and-components/component-guide/orchestrators/skypilot-vm.md) + * [HyperAI Orchestrator](stacks-and-components/component-guide/orchestrators/hyperai.md) * [Develop a custom orchestrator](stacks-and-components/component-guide/orchestrators/custom.md) * [Artifact Stores](stacks-and-components/component-guide/artifact-stores/artifact-stores.md) * [Local Artifact Store](stacks-and-components/component-guide/artifact-stores/local.md) From 2cdcfaccef9568fcef88e075b5d95ae1d62045fa Mon Sep 17 00:00:00 2001 From: kabinja Date: Tue, 6 Feb 2024 19:06:49 +0100 Subject: [PATCH 15/27] fix docstring indentation --- src/zenml/stack/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 35868ef943..6a85dab807 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -163,9 +163,9 @@ def __exit__( Args: exception_type: Type of the exception that was raised. - None if no exception. + None if no exception. exception_value: Type of exception that was raised. - e.g., divide_by_zero error. None if no exception. + e.g., divide_by_zero error. None if no exception. traceback: Traceback report. None if no exception. """ if self._default_stack.id != Client().active_stack: From 590b07403abc8debf1c4187ed6caea14b3be4e42 Mon Sep 17 00:00:00 2001 From: Stefan Nica Date: Mon, 5 Feb 2024 15:10:28 +0100 Subject: [PATCH 16/27] Fix GCP service connector login to overwrite existing valid credentials (#2392) --- .../integrations/gcp/service_connectors/gcp_service_connector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zenml/integrations/gcp/service_connectors/gcp_service_connector.py b/src/zenml/integrations/gcp/service_connectors/gcp_service_connector.py index d571f92ea0..c0953003a9 100644 --- a/src/zenml/integrations/gcp/service_connectors/gcp_service_connector.py +++ b/src/zenml/integrations/gcp/service_connectors/gcp_service_connector.py @@ -1339,6 +1339,7 @@ def _configure_local_client( "gcloud", "auth", "login", + "--quiet", "--cred-file", adc_path, ], From b64fae9eee6464534b3b826b2cfd40b84620e986 Mon Sep 17 00:00:00 2001 From: Andrei Vishniakov <31008759+avishniakov@users.noreply.github.com> Date: Mon, 5 Feb 2024 21:32:27 +0100 Subject: [PATCH 17/27] Update `has_custom_name` for legacy artifacts (#2384) * update `has_custom_name` for legacy artifacts * add test * handle special characters in artifact names * update test signature * update test signature --------- Co-authored-by: Safoine El Khabich <34200873+safoinme@users.noreply.github.com> --- src/zenml/artifacts/utils.py | 4 +++ src/zenml/client.py | 3 ++ src/zenml/models/v2/core/artifact.py | 1 + src/zenml/orchestrators/output_utils.py | 4 +++ .../zen_stores/schemas/artifact_schemas.py | 2 ++ .../artifacts/test_artifact_config.py | 36 +++++++++++++++++++ tests/unit/orchestrators/test_output_utils.py | 12 +++---- 7 files changed, 54 insertions(+), 8 deletions(-) diff --git a/src/zenml/artifacts/utils.py b/src/zenml/artifacts/utils.py index 6ec3403c2a..41d7e1ce0a 100644 --- a/src/zenml/artifacts/utils.py +++ b/src/zenml/artifacts/utils.py @@ -189,6 +189,10 @@ def save_artifact( # Get or create the artifact try: artifact = client.list_artifacts(name=name)[0] + if artifact.has_custom_name != has_custom_name: + client.update_artifact( + name_id_or_prefix=artifact.id, has_custom_name=has_custom_name + ) except IndexError: artifact = client.zen_store.create_artifact( ArtifactRequest( diff --git a/src/zenml/client.py b/src/zenml/client.py index ef5d9e3a81..9830fc25a7 100644 --- a/src/zenml/client.py +++ b/src/zenml/client.py @@ -2761,6 +2761,7 @@ def update_artifact( new_name: Optional[str] = None, add_tags: Optional[List[str]] = None, remove_tags: Optional[List[str]] = None, + has_custom_name: Optional[bool] = None, ) -> ArtifactResponse: """Update an artifact. @@ -2769,6 +2770,7 @@ def update_artifact( new_name: The new name of the artifact. add_tags: Tags to add to the artifact. remove_tags: Tags to remove from the artifact. + has_custom_name: Whether the artifact has a custom name. Returns: The updated artifact. @@ -2778,6 +2780,7 @@ def update_artifact( name=new_name, add_tags=add_tags, remove_tags=remove_tags, + has_custom_name=has_custom_name, ) return self.zen_store.update_artifact( artifact_id=artifact.id, artifact_update=artifact_update diff --git a/src/zenml/models/v2/core/artifact.py b/src/zenml/models/v2/core/artifact.py index 3938dbff1f..8fb548924d 100644 --- a/src/zenml/models/v2/core/artifact.py +++ b/src/zenml/models/v2/core/artifact.py @@ -60,6 +60,7 @@ class ArtifactUpdate(BaseModel): name: Optional[str] = None add_tags: Optional[List[str]] = None remove_tags: Optional[List[str]] = None + has_custom_name: Optional[bool] = None # ------------------ Response Model ------------------ diff --git a/src/zenml/orchestrators/output_utils.py b/src/zenml/orchestrators/output_utils.py index 0bf5385cf6..21f8b74ccb 100644 --- a/src/zenml/orchestrators/output_utils.py +++ b/src/zenml/orchestrators/output_utils.py @@ -15,6 +15,7 @@ import os from typing import TYPE_CHECKING, Dict, Sequence +from uuid import uuid4 from zenml.io import fileio from zenml.logger import get_logger @@ -44,11 +45,14 @@ def generate_artifact_uri( Returns: The URI of the output artifact. """ + for banned_character in ["<", ">", ":", '"', "/", "\\", "|", "?", "*"]: + output_name = output_name.replace(banned_character, "_") return os.path.join( artifact_store.path, step_run.name, output_name, str(step_run.id), + str(uuid4())[:8], # add random subfolder to avoid collisions ) diff --git a/src/zenml/zen_stores/schemas/artifact_schemas.py b/src/zenml/zen_stores/schemas/artifact_schemas.py index a7a60574bd..93a1a81394 100644 --- a/src/zenml/zen_stores/schemas/artifact_schemas.py +++ b/src/zenml/zen_stores/schemas/artifact_schemas.py @@ -143,6 +143,8 @@ def update(self, artifact_update: ArtifactUpdate) -> "ArtifactSchema": if artifact_update.name: self.name = artifact_update.name self.has_custom_name = True + if artifact_update.has_custom_name is not None: + self.has_custom_name = artifact_update.has_custom_name return self diff --git a/tests/integration/functional/artifacts/test_artifact_config.py b/tests/integration/functional/artifacts/test_artifact_config.py index 5d018e9ad5..68990ab768 100644 --- a/tests/integration/functional/artifacts/test_artifact_config.py +++ b/tests/integration/functional/artifacts/test_artifact_config.py @@ -442,3 +442,39 @@ def _inner_pipeline(force_disable_cache: bool = False): assert ( len(mvrm.data_artifact_ids["cacheable"]) == 1 ), f"Failed on {i} run" + + +@step +def standard_name_producer() -> str: + return "standard" + + +@step +def custom_name_producer() -> ( + Annotated[str, "pipeline_::standard_name_producer::output"] +): + return "custom" + + +def test_update_of_has_custom_name(clean_client: "Client"): + """Test that update of has_custom_name works.""" + + @pipeline(enable_cache=False) + def pipeline_(): + standard_name_producer() + + @pipeline(enable_cache=False) + def pipeline_2(): + custom_name_producer() + + # run 2 times to see both ways switching + for i in range(2): + pipeline_() + assert not clean_client.get_artifact( + "pipeline_::standard_name_producer::output" + ).has_custom_name, f"Standard name validation failed in {i+1} run" + + pipeline_2() + assert clean_client.get_artifact( + "pipeline_::standard_name_producer::output" + ).has_custom_name, f"Custom name validation failed in {i+1} run" diff --git a/tests/unit/orchestrators/test_output_utils.py b/tests/unit/orchestrators/test_output_utils.py index e63c4d4607..6bb4d12d48 100644 --- a/tests/unit/orchestrators/test_output_utils.py +++ b/tests/unit/orchestrators/test_output_utils.py @@ -14,8 +14,6 @@ import os -import pytest - from zenml.config.step_configurations import Step from zenml.orchestrators import output_utils @@ -41,11 +39,9 @@ def test_output_artifact_preparation(create_step_run, local_stack): "output_name", str(step_run.id), ) + output_artifact_uris["output_name"] = os.path.split( + output_artifact_uris["output_name"] + )[0] + assert output_artifact_uris == {"output_name": expected_path} assert os.path.isdir(expected_path) - - # artifact directory already exists - with pytest.raises(RuntimeError): - output_utils.prepare_output_artifact_uris( - step_run=step_run, stack=local_stack, step=step - ) From ce9a34f1b6613465915b05872e83f87619cf64a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20SERRA?= Date: Tue, 6 Feb 2024 09:07:40 +0100 Subject: [PATCH 18/27] Use native VertexAI scheduler capability instead of old GCP official workaround (#2310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Use native VertexAI pipelines scheduling facility * deprecate VertexOrchestrator no longer used fields * updating docs * remove dead code * style: :art: linting * Update docs/book/stacks-and-components/component-guide/orchestrators/vertex.md Co-authored-by: Alex Strick van Linschoten * Update docs/book/stacks-and-components/component-guide/orchestrators/vertex.md Co-authored-by: Alex Strick van Linschoten * Update src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py Co-authored-by: Alex Strick van Linschoten * Remove no longer used libraries * feat: add support for start_time and end_time in the schedule * Update src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py Co-authored-by: Alex Strick van Linschoten * docs: Add details about schedule parameters * Apply suggestions from code review --------- Co-authored-by: Alex Strick van Linschoten Co-authored-by: Hamza Tahir Co-authored-by: BarฤฑลŸ Can Durak <36421093+bcdurak@users.noreply.github.com> --- .../component-guide/orchestrators/vertex.md | 94 ++----- src/zenml/integrations/gcp/__init__.py | 2 - .../gcp/flavors/vertex_orchestrator_flavor.py | 6 +- .../integrations/gcp/google_cloud_function.py | 187 -------------- .../gcp/google_cloud_scheduler.py | 83 ------ .../gcp/orchestrators/vertex_orchestrator.py | 239 ++++-------------- .../vertex_scheduler/__init__.py | 14 - .../orchestrators/vertex_scheduler/main.py | 91 ------- .../vertex_scheduler/requirements.txt | 2 - 9 files changed, 85 insertions(+), 633 deletions(-) delete mode 100644 src/zenml/integrations/gcp/google_cloud_function.py delete mode 100644 src/zenml/integrations/gcp/google_cloud_scheduler.py delete mode 100644 src/zenml/integrations/gcp/orchestrators/vertex_scheduler/__init__.py delete mode 100644 src/zenml/integrations/gcp/orchestrators/vertex_scheduler/main.py delete mode 100644 src/zenml/integrations/gcp/orchestrators/vertex_scheduler/requirements.txt diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/vertex.md b/docs/book/stacks-and-components/component-guide/orchestrators/vertex.md index 7f46354607..0964861af8 100644 --- a/docs/book/stacks-and-components/component-guide/orchestrators/vertex.md +++ b/docs/book/stacks-and-components/component-guide/orchestrators/vertex.md @@ -66,7 +66,7 @@ To use the Vertex orchestrator, we need: ### GCP credentials and permissions This part is without doubt the most involved part of using the Vertex orchestrator. In order to run pipelines on Vertex AI, -you need to have a GCP user account and/or one or more GCP service accounts set up with proper permissions, depending on whether [you want to schedule pipelines](#run-pipelines-on-a-schedule) and depending on whether you wish to practice [the principle of least privilege](https://en.wikipedia.org/wiki/Principle_of_least_privilege) and distribute permissions across multiple service accounts. +you need to have a GCP user account and/or one or more GCP service accounts set up with proper permissions, depending on whether you wish to practice [the principle of least privilege](https://en.wikipedia.org/wiki/Principle_of_least_privilege) and distribute permissions across multiple service accounts. You also have three different options to provide credentials to the orchestrator: @@ -76,7 +76,7 @@ You also have three different options to provide credentials to the orchestrator This section [explains the different components and GCP resources](#vertex-ai-pipeline-components) involved in running a Vertex AI pipeline and what permissions they need, then provides instructions for three different configuration use-cases: -1. [use the local `gcloud` CLI configured with your GCP user account](#configuration-use-case-local-gcloud-cli-with-user-account), without the ability to schedule pipelines +1. [use the local `gcloud` CLI configured with your GCP user account](#configuration-use-case-local-gcloud-cli-with-user-account), including the ability to schedule pipelines 2. [use a GCP Service Connector and a single service account](#configuration-use-case-gcp-service-connector-with-single-service-account) with all permissions, including the ability to schedule pipelines 3. [use a GCP Service Connector and multiple service accounts](#configuration-use-case-gcp-service-connector-with-different-service-accounts) for different permissions, including the ability to schedule pipelines @@ -88,8 +88,6 @@ To understand what accounts you need to provision and why, let's look at the dif building the pipeline Docker image and submitting the pipeline to Vertex AI, among other things. This is usually your local machine or some other environment used to automate running pipelines, like a CI/CD job. This environment needs to be able to authenticate with GCP and needs to have the necessary permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). If you are planning to [run pipelines on a schedule](#run-pipelines-on-a-schedule), *the ZenML client environment* also needs additional permissions: - * permissions to create a Google Cloud Function (e.g. with the [`Cloud Functions Developer Role`](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.developer)). - * permissions to create a Google Cloud Scheduler (e.g. with the [Cloud Scheduler Admin Role](https://cloud.google.com/iam/docs/understanding-roles#cloudscheduler.admin)). * the [`Storage Object Creator Role`](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) to be able to write the pipeline JSON file to the artifact store directly (NOTE: not needed if the Artifact Store is configured with credentials or is linked to Service Connector) 2. *the Vertex AI pipeline environment* is the GCP environment in which the pipeline steps themselves are running in GCP. The Vertex AI pipeline runs in the context of a GCP service account which we'll call here *the workload service account*. @@ -97,38 +95,21 @@ needs to be able to authenticate with GCP and needs to have the necessary permis * permissions to run a Vertex AI pipeline, (e.g. [the `Vertex AI Service Agent` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.serviceAgent)). -3. *the scheduler Google Cloud Function* is a GCP resource that is used to trigger the pipeline on a schedule. This component is only needed if you intend on running Vertex AI pipelines on a schedule. The scheduler function runs in the context of a GCP service account which we'll call here *the function service account*. *The function service account* can be explicitly configured in the orchestrator configuration via the `function_service_account` parameter. If it is omitted, the orchestrator will use [the Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) for the GCP project in which the pipeline is running. This service account needs to have the following permissions: - - * permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). - -4. *the Google Cloud Scheduler* is a GCP resource that is used to trigger the pipeline on a schedule. This component is only needed if you intend on running Vertex AI pipelines on a schedule. The scheduler needs a GCP service account to authenticate to *the scheduler Google Cloud Function*. Let's call this service account *the scheduler service account*. *The scheduler service account* can be explicitly configured in the orchestrator configuration via the `scheduler_service_account` parameter. If it is omitted, the orchestrator will use the following, in order of precedence: - * the service account used by *the ZenML client environment* credentials, if present. - * the service account specified in the `function_service_account` parameter. - * the service account specified in the `workload_service_account` parameter. - -*The scheduler service account* must have the following permissions: - -* permissions to trigger the scheduler function, (e.g. [the `Cloud Functions Invoker` role](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.invoker) and [the `Cloud Run Invoker` role](https://cloud.google.com/run/docs/reference/iam/roles#standard-roles)). -* the [Storage Object Viewer Role](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) to be able to read the pipeline JSON file from the artifact store. - -As you can see, there can be as many as three different service accounts involved in running a Vertex AI pipeline. Four, if you also use a service account to authenticate to GCP in *the ZenML client environment*. However, you can keep it simple an use the same service account everywhere. +As you can see, there can be dedicated service accounts involved in running a Vertex AI pipeline. That's two service accounts if you also use a service account to authenticate to GCP in *the ZenML client environment*. However, you can keep it simple and use the same service account everywhere. #### Configuration use-case: local `gcloud` CLI with user account This configuration use-case assumes you have configured the [`gcloud` CLI](https://cloud.google.com/sdk/gcloud) to authenticate locally with your GCP account (i.e. by running `gcloud auth login`). It also assumes the following: -* you are not planning to run pipelines on a schedule. * your GCP account has permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). * [the Compute Engine default service account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) for the GCP project in which the pipeline is running is updated with additional permissions required to run a Vertex AI pipeline, (e.g. [the `Vertex AI Service Agent` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.serviceAgent)). {% hint style="info" %} This is the easiest way to configure the Vertex AI Orchestrator, but it has the following drawbacks: -* you can't run pipelines on a schedule. * the setup is not portable on other machines and reproducible by other users. * it uses the Compute Engine default service account, which is not recommended, given that it has a lot of permissions by default and is used by many other GCP services. -{% endhint %} We can then register the orchestrator as follows: @@ -142,36 +123,23 @@ zenml orchestrator register \ #### Configuration use-case: GCP Service Connector with single service account -This configuration uses a single GCP service account that has all the permissions needed to run and/or schedule a Vertex AI pipeline. This configuration is useful if you want to run pipelines on a schedule, but don't want to use the Compute Engine default service account. Using [a Service Connector](../../auth-management/auth-management.md) brings the added benefit of making your pipeline fully portable. This use-case assumes you have already configured a GCP service account with the following permissions: * permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). * permissions to run a Vertex AI pipeline, (e.g. [the `Vertex AI Service Agent` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.serviceAgent)). -* permissions to create a Google Cloud Function (e.g. with the [`Cloud Functions Developer Role`](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.developer)). * the [Storage Object Creator Role](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) to be able to write the pipeline JSON file to the artifact store directly. -* permissions to trigger the scheduler function, (e.g. [the `Cloud Functions Invoker` role](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.invoker) and [the `Cloud Run Invoker` role](https://cloud.google.com/run/docs/reference/iam/roles#standard-roles)). -* permissions to create a Google Cloud Scheduler job (e.g. with the [`Cloud Scheduler Admin Role`](https://cloud.google.com/scheduler/docs/quickstart#before_you_begin)). It also assumes you have already created a service account key for this service account and downloaded it to your local machine (e.g. in a `connectors-vertex-ai-workload.json` file). - -{% hint style="info" %} -This setup is portable and reproducible, but it throws all the permissions in a single service account, which is not recommended if you are conscious about security. The principle of least privilege is not applied here and the environment in which the pipeline steps are running has too many permissions that it doesn't need. -{% endhint %} - -We can then register [the GCP Service Connector](../../auth-management/gcp-service-connector.md) and Vertex AI orchestrator as follows: - -```shell +This is not recommended if you are conscious about security. The principle of least privilege is not applied here and the environment in which the pipeline steps are running has many permissions that it doesn't need. zenml service-connector register --type gcp --auth-method=service-account --project_id= --service_account_json=@connectors-vertex-ai-workload.json --resource-type gcp-generic zenml orchestrator register \ --flavor=vertex \ --location= \ --synchronous=true \ - --workload_service_account=@.iam.gserviceaccount.com \ - --function_service_account=@.iam.gserviceaccount.com \ - --scheduler_service_account=@.iam.gserviceaccount.com - + --workload_service_account=@.iam.gserviceaccount.com + zenml orchestrator connect --connector ``` #### Configuration use-case: GCP Service Connector with different service accounts @@ -188,19 +156,10 @@ The following GCP service accounts are needed: 1. a "client" service account that has the following permissions: * permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). * permissions to create a Google Cloud Function (e.g. with the [`Cloud Functions Developer Role`](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.developer)). - * permissions to create a Google Cloud Scheduler job (e.g. with the [`Cloud Scheduler Admin Role`](https://cloud.google.com/scheduler/docs/quickstart#before_you_begin)). * the [Storage Object Creator Role](https://cloud.google.com/iam/docs/understanding-roles#storage.objectCreator) to be able to write the pipeline JSON file to the artifact store directly (NOTE: not needed if the Artifact Store is configured with credentials or is linked to Service Connector). 2. a "workload" service account that has permissions to run a Vertex AI pipeline, (e.g. [the `Vertex AI Service Agent` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.serviceAgent)). -3. a "function" service account that has the following permissions: - * permissions to create a job in Vertex Pipelines, (e.g. [the `Vertex AI User` role](https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user)). - * the [Storage Object Viewer Role](https://cloud.google.com/iam/docs/understanding-roles#storage.objectViewer) to be able to read the pipeline JSON file from the artifact store. - - The "client" service account also needs to be granted the `iam.serviceaccounts.actAs` permission on this service account (i.e. the "client" service account needs [the `Service Account User` role](https://cloud.google.com/iam/docs/service-account-permissions#user-role) on the "function" service account). Similarly, the "function" service account also needs to be granted the `iam.serviceaccounts.actAs` permission on the "workload" service account. - -4. a "scheduler" service account that has permissions to trigger the scheduler function, (e.g. [the `Cloud Functions Invoker` role](https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.invoker) and [the `Cloud Run Invoker` role](https://cloud.google.com/run/docs/reference/iam/roles#standard-roles)). The "client" service account also needs to be granted the `iam.serviceaccounts.actAs` permission on this service account (i.e. the "client" service account needs [the `Service Account User` role](https://cloud.google.com/iam/docs/service-account-permissions#user-role) on the "scheduler" service account). - A key is also needed for the "client" service account. You can create a key for this service account and download it to your local machine (e.g. in a `connectors-vertex-ai-workload.json` file). With all the service accounts and the key ready, we can register [the GCP Service Connector](../../auth-management/gcp-service-connector.md) and Vertex AI orchestrator as follows: @@ -212,9 +171,7 @@ zenml orchestrator register \ --flavor=vertex \ --location= \ --synchronous=true \ - --workload_service_account=@.iam.gserviceaccount.com \ - --function_service_account=@.iam.gserviceaccount.com \ - --scheduler_service_account=@.iam.gserviceaccount.com + --workload_service_account=@.iam.gserviceaccount.com zenml orchestrator connect --connector ``` @@ -258,23 +215,7 @@ orchestrator_url = pipeline_run.run_metadata["orchestrator_url"].value ### Run pipelines on a schedule -The Vertex Pipelines orchestrator supports running pipelines on a schedule, using logic resembling -the [official approach recommended by GCP](https://cloud.google.com/vertex-ai/docs/pipelines/schedule-cloud-scheduler). - -ZenML utilizes the [Cloud Scheduler](https://cloud.google.com/scheduler) -and [Cloud Functions](https://cloud.google.com/functions) services to enable scheduling on Vertex Pipelines. The -following is the sequence of events that happen when running a pipeline on Vertex with a schedule: - -* A docker image is created and pushed (see - above [containerization](/docs/book/user-guide/advanced-guide/infrastructure-management/containerize-your-pipeline.md)). -* The Vertex AI pipeline JSON file is copied to - the [Artifact Store](../artifact-stores/artifact-stores.md) specified in - your [Stack](/docs/book/user-guide/production-guide/understand-stacks.md) -* Cloud Function is created that creates the Vertex Pipeline job when triggered. -* A Cloud Scheduler job is created that triggers the Cloud Function on the defined schedule. - -Therefore, to run on a schedule, the client environment needs additional permissions and a GCP service account at least is required for the Cloud Scheduler job to be able to authenticate with the Cloud Function, as explained in the -[GCP credentials and permissions](#gcp-credentials-and-permissions) section. +The Vertex Pipelines orchestrator supports running pipelines on a schedule using its [native scheduling capability](https://cloud.google.com/vertex-ai/docs/pipelines/schedule-pipeline-run). **How to schedule a pipeline** @@ -287,20 +228,33 @@ pipeline_instance.run( cron_expression="*/5 * * * *" ) ) + +# Run a pipeline every hour +# starting in one day from now and ending in three days from now +pipeline_instance.run( + schedule=Schedule( + cron_expression="0 * * * *" + start_time=datetime.datetime.now() + datetime.timedelta(days=1), + end_time=datetime.datetime.now() + datetime.timedelta(days=3), + ) +) ``` {% hint style="warning" %} -The Vertex orchestrator only supports the `cron_expression` parameter in the `Schedule` object, and will ignore all +The Vertex orchestrator only supports the `cron_expression`, `start_time` (optional) and `end_time` (optional) parameters in the `Schedule` object, and will ignore all other parameters supplied to define the schedule. {% endhint %} +The `start_time` and `end_time` timestamp parameters are both optional and are to be specified in local time. They define the time window in which the pipeline runs will be triggered. If they are not specified, the pipeline will run indefinitely. + +The `cron_expression` parameter [supports timezones](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.schedules). For example, the expression `TZ=Europe/Paris 0 10 * * *` will trigger runs at 10:00 in the Europe/Paris timezone. + **How to delete a scheduled pipeline** Note that ZenML only gets involved to schedule a run, but maintaining the lifecycle of the schedule is the responsibility of the user. -In order to cancel a scheduled Vertex pipeline, you need to manually delete the generated Google Cloud Function, along -with the Cloud Scheduler job that schedules it (via the UI or the CLI). +In order to cancel a scheduled Vertex pipeline, you need to manually delete the schedule in VertexAI (via the UI or the CLI). ### Additional configuration diff --git a/src/zenml/integrations/gcp/__init__.py b/src/zenml/integrations/gcp/__init__.py index d23130dd94..4e9e66e561 100644 --- a/src/zenml/integrations/gcp/__init__.py +++ b/src/zenml/integrations/gcp/__init__.py @@ -49,8 +49,6 @@ class GcpIntegration(Integration): "google-cloud-container>=2.21.0", "google-cloud-storage>=2.9.0", "google-cloud-aiplatform>=1.21.0", # includes shapely pin fix - "google-cloud-scheduler>=2.7.3", - "google-cloud-functions>=1.8.3", "google-cloud-build>=3.11.0", "kubernetes", ] diff --git a/src/zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py b/src/zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py index 17236d81c8..8492d89ce6 100644 --- a/src/zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py +++ b/src/zenml/integrations/gcp/flavors/vertex_orchestrator_flavor.py @@ -133,7 +133,11 @@ class VertexOrchestratorConfig( # type: ignore[misc] # https://github.com/pydan gpu_limit: Optional[int] = None _resource_deprecation = deprecation_utils.deprecate_pydantic_attributes( - "cpu_limit", "memory_limit", "gpu_limit" + "cpu_limit", + "memory_limit", + "gpu_limit", + "function_service_account", + "scheduler_service_account", ) @property diff --git a/src/zenml/integrations/gcp/google_cloud_function.py b/src/zenml/integrations/gcp/google_cloud_function.py deleted file mode 100644 index 99b3247f03..0000000000 --- a/src/zenml/integrations/gcp/google_cloud_function.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) ZenML GmbH 2022. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Utils for the Google Cloud Functions API.""" - -import os -import tempfile -import time -import zipfile -from typing import TYPE_CHECKING, Optional - -from google.cloud import functions_v2 -from google.cloud.functions_v2.types import ( - BuildConfig, - CreateFunctionRequest, - Function, - GetFunctionRequest, - ServiceConfig, - Source, - StorageSource, -) - -from zenml.io import fileio -from zenml.logger import get_logger - -logger = get_logger(__name__) - -if TYPE_CHECKING: - from google.auth.credentials import Credentials - - -def get_cloud_functions_api( - credentials: Optional["Credentials"] = None, -) -> functions_v2.FunctionServiceClient: - """Gets the cloud functions API resource client. - - Args: - credentials: Google cloud credentials. - - Returns: - Cloud Functions V2 Client. - """ - return functions_v2.FunctionServiceClient(credentials=credentials) - - -def zipdir(path: str, ziph: zipfile.ZipFile) -> None: - """Zips a directory using an Zipfile object. - - Args: - path: Path to zip directory to. - ziph: A `zipfile.Zipfile` file object. - """ - for root, _, files in os.walk(path): - for file in files: - if file != "__init__.py": - ziph.write(os.path.join(root, file), file) - - -def upload_directory( - directory_path: str, - upload_path: str, -) -> StorageSource: - """Uploads local directory to remote one. - - Args: - upload_path: GCS path where to upload the zipped function code. - directory_path: Local path of directory to upload. - - Returns: - Storage source (https://cloud.google.com/functions/docs/reference/rest/v2/projects.locations.functions#StorageSource). - """ - with tempfile.NamedTemporaryFile(delete=False) as f: - with open(f.name, "wb") as data: - with zipfile.ZipFile(data, "w", zipfile.ZIP_DEFLATED) as archive: - zipdir(directory_path, archive) - data.seek(0) - - # Copy and remove - fileio.copy(f.name, upload_path, overwrite=True) - fileio.remove(f.name) - - # Split the path by "/" character - bucket, object_path = upload_path.replace("gs://", "").split( - "/", maxsplit=1 - ) - - return StorageSource( - bucket=bucket, - object_=object_path, - ) - - -def create_cloud_function( - directory_path: str, - upload_path: str, - project: str, - location: str, - function_name: str, - credentials: Optional["Credentials"] = None, - function_service_account_email: Optional[str] = None, - timeout: int = 1800, -) -> str: - """Create google cloud function from specified directory path. - - Args: - directory_path: Local path to directory where function code resides. - upload_path: GCS path where to upload the function code. - project: GCP project ID. - location: GCP location name. - function_name: Name of the function to create. - credentials: Credentials to use for GCP services. - function_service_account_email: The service account email the function will run with. - timeout: Timeout in seconds. - - Returns: - str: URI of the created cloud function. - - Raises: - TimeoutError: If function times out. - RuntimeError: If scheduling runs into a problem. - """ - sanitized_function_name = function_name.replace("_", "-") - parent = f"projects/{project}/locations/{location}" - function_full_name = f"{parent}/functions/{sanitized_function_name}" - logger.info(f"Creating Google Cloud Function: {function_full_name}") - - storage_source = upload_directory(directory_path, upload_path) - - # Make the request - get_cloud_functions_api(credentials=credentials).create_function( - request=CreateFunctionRequest( - parent=parent, - function_id=sanitized_function_name, - function=Function( - name=function_full_name, - build_config=BuildConfig( - entry_point="trigger_vertex_job", - runtime="python38", - source=Source(storage_source=storage_source), - ), - service_config=ServiceConfig( - service_account_email=function_service_account_email - ) - if function_service_account_email - else None, - ), - ) - ) - - state = Function.State.DEPLOYING - logger.info( - "Creating cloud function to run pipeline... This might take a few " - "minutes. Please do not exit the program at this point..." - ) - - start_time = time.time() - while state == Function.State.DEPLOYING: - response = get_cloud_functions_api( - credentials=credentials - ).get_function(request=GetFunctionRequest(name=function_full_name)) - state = response.state - logger.info("Still creating... sleeping for 5 seconds...") - time.sleep(5) - - if time.time() - start_time > timeout: - raise TimeoutError("Timed out waiting for function to deploy!") - - if state != Function.State.ACTIVE: - error_messages = ", ".join( - [msg.message for msg in response.state_messages] - ) - raise RuntimeError( - f"Scheduling failed with the following messages: {error_messages}" - ) - - logger.info(f"Done! Function available at {response.service_config.uri}") - return str(response.service_config.uri) diff --git a/src/zenml/integrations/gcp/google_cloud_scheduler.py b/src/zenml/integrations/gcp/google_cloud_scheduler.py deleted file mode 100644 index cb0352a2bc..0000000000 --- a/src/zenml/integrations/gcp/google_cloud_scheduler.py +++ /dev/null @@ -1,83 +0,0 @@ -# Copyright (c) ZenML GmbH 2022. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Utils for the Google Cloud Scheduler API.""" - -import json -import logging -from typing import TYPE_CHECKING, Dict, Optional, Union - -from google.cloud import scheduler -from google.cloud.scheduler_v1.types import ( - CreateJobRequest, - HttpMethod, - HttpTarget, - Job, - OidcToken, -) - -if TYPE_CHECKING: - from google.auth.credentials import Credentials - - -def create_scheduler_job( - project: str, - region: str, - http_uri: str, - service_account_email: str, - body: Dict[str, Union[Dict[str, str], bool, str, None]], - credentials: Optional["Credentials"] = None, - schedule: str = "* * * * *", - time_zone: str = "Etc/UTC", -) -> None: - """Creates a Google Cloud Scheduler job. - - Job periodically sends POST request to the specified HTTP URI on a schedule. - - Args: - project: GCP project ID. - region: GCP region. - http_uri: HTTP URI of the cloud function to call. - service_account_email: Service account email to use to authenticate to - the Google Cloud Function through an OIDC token. - body: The body of values to send to the cloud function in the POST call. - schedule: Cron expression of the schedule. Defaults to "* * * * *". - time_zone: Time zone of the schedule. Defaults to "Etc/UTC". - credentials: Credentials to use for GCP services. - """ - # Create a client. - client = scheduler.CloudSchedulerClient(credentials=credentials) - - # Construct the fully qualified location path. - parent = f"projects/{project}/locations/{region}" - - # Use the client to send the job creation request. - job = client.create_job( - request=CreateJobRequest( - parent=parent, - job=Job( - http_target=HttpTarget( - uri=http_uri, - body=json.dumps(body).encode(), - http_method=HttpMethod.POST, - oidc_token=OidcToken( - service_account_email=service_account_email - ), - ), - schedule=schedule, - time_zone=time_zone, - ), - ) - ) - - logging.debug(f"Created scheduler job. Response: {job}") diff --git a/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py b/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py index 0bb511f1c2..f68e68221f 100644 --- a/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py +++ b/src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py @@ -54,25 +54,9 @@ VertexOrchestratorConfig, VertexOrchestratorSettings, ) -from zenml.integrations.gcp.google_cloud_function import create_cloud_function -from zenml.integrations.gcp.google_cloud_scheduler import create_scheduler_job from zenml.integrations.gcp.google_credentials_mixin import ( GoogleCredentialsMixin, ) -from zenml.integrations.gcp.orchestrators import vertex_scheduler -from zenml.integrations.gcp.orchestrators.vertex_scheduler.main import ( - ENABLE_CACHING, - ENCRYPTION_SPEC_KEY_NAME, - JOB_ID, - LABELS, - LOCATION, - NETWORK, - PARAMETER_VALUES, - PIPELINE_ROOT, - PROJECT, - TEMPLATE_PATH, - WORKLOAD_SERVICE_ACCOUNT, -) from zenml.integrations.kubeflow.utils import apply_pod_settings from zenml.io import fileio from zenml.logger import get_logger @@ -249,14 +233,12 @@ def prepare_pipeline_deployment( if deployment.schedule: if ( deployment.schedule.catchup - or deployment.schedule.start_time - or deployment.schedule.end_time or deployment.schedule.interval_second ): logger.warning( "Vertex orchestrator only uses schedules with the " - "`cron_expression` property. All other properties " - "are ignored." + "`cron_expression` property, with optional `start_time` and/or `end_time`. " + "All other properties are ignored." ) if deployment.schedule.cron_expression is None: raise ValueError( @@ -478,143 +460,15 @@ def _construct_kfp_pipeline() -> None: VertexOrchestratorSettings, self.get_settings(deployment) ) - if deployment.schedule: - logger.info( - "Scheduling job using Google Cloud Scheduler and Google " - "Cloud Functions..." - ) - self._upload_and_schedule_pipeline( - pipeline_name=deployment.pipeline_configuration.name, - run_name=orchestrator_run_name, - stack=stack, - schedule=deployment.schedule, - pipeline_file_path=pipeline_file_path, - settings=settings, - ) - - else: - logger.info("No schedule detected. Creating one-off vertex job...") - # Using the Google Cloud AIPlatform client, upload and execute the - # pipeline - # on the Vertex AI Pipelines service. - self._upload_and_run_pipeline( - pipeline_name=deployment.pipeline_configuration.name, - pipeline_file_path=pipeline_file_path, - run_name=orchestrator_run_name, - settings=settings, - ) - - def _upload_and_schedule_pipeline( - self, - pipeline_name: str, - run_name: str, - stack: "Stack", - schedule: "ScheduleResponse", - pipeline_file_path: str, - settings: VertexOrchestratorSettings, - ) -> None: - """Uploads and schedules pipeline on GCP. - - Args: - pipeline_name: Name of the pipeline. - run_name: Orchestrator run name. - stack: The stack the pipeline will run on. - schedule: The schedule the pipeline will run on. - pipeline_file_path: Path of the JSON file containing the compiled - Kubeflow pipeline (compiled with Kubeflow SDK v2). - settings: Pipeline level settings for this orchestrator. - - Raises: - ValueError: If the attribute `pipeline_root` is not set, and it - can be not generated using the path of the artifact store in the - stack because it is not a - `zenml.integrations.gcp.artifact_store.GCPArtifactStore`. Also - gets raised if attempting to schedule pipeline run without using - the `zenml.integrations.gcp.artifact_store.GCPArtifactStore`. - """ - # First, do some validation - artifact_store = stack.artifact_store - if artifact_store.flavor != GCP_ARTIFACT_STORE_FLAVOR: - raise ValueError( - "Currently, the Vertex AI orchestrator only supports " - "scheduled runs in combination with an artifact store of " - f"flavor: {GCP_ARTIFACT_STORE_FLAVOR}. The current stacks " - f"artifact store is of flavor: {artifact_store.flavor}. " - "Please update your stack accordingly." - ) - - # Get the credentials that would be used to create resources. - credentials, project_id = self._get_authentication() - - scheduler_service_account_email: Optional[str] = None - if self.config.scheduler_service_account: - scheduler_service_account_email = ( - self.config.scheduler_service_account - ) - elif hasattr(credentials, "signer_email"): - scheduler_service_account_email = credentials.signer_email - else: - scheduler_service_account_email = ( - self.config.function_service_account - or self.config.workload_service_account - ) - - if not scheduler_service_account_email: - raise ValueError( - "A GCP service account is required to schedule a pipeline run. " - "The credentials used to authenticate with GCP do not have a " - "service account associated with them and a service account " - "was not configured in the `scheduler_service_account` field " - "of the orchestrator config. Please update your orchestrator " - "configuration or credentials accordingly." - ) - - # Copy over the scheduled pipeline to the artifact store - artifact_store_base_uri = f"{artifact_store.path.rstrip('/')}/vertex_scheduled_pipelines/{pipeline_name}/{run_name}" - artifact_store_pipeline_uri = ( - f"{artifact_store_base_uri}/vertex_pipeline.json" - ) - fileio.copy(pipeline_file_path, artifact_store_pipeline_uri) - logger.info( - "The scheduled pipeline representation has been " - "automatically copied to this path of the `GCPArtifactStore`: " - f"{artifact_store_pipeline_uri}", - ) - - # Create cloud function - function_uri = create_cloud_function( - directory_path=vertex_scheduler.__path__[0], # fixed path - upload_path=f"{artifact_store_base_uri}/code.zip", - project=project_id, - location=self.config.location, - function_name=run_name, - credentials=credentials, - function_service_account_email=self.config.function_service_account, - ) - - # Create the scheduler job - body = { - TEMPLATE_PATH: artifact_store_pipeline_uri, - JOB_ID: _clean_pipeline_name(pipeline_name), - PIPELINE_ROOT: self._pipeline_root, - PARAMETER_VALUES: None, - ENABLE_CACHING: False, - ENCRYPTION_SPEC_KEY_NAME: self.config.encryption_spec_key_name, - LABELS: settings.labels, - PROJECT: project_id, - LOCATION: self.config.location, - WORKLOAD_SERVICE_ACCOUNT: self.config.workload_service_account, - NETWORK: self.config.network, - } - - create_scheduler_job( - project=project_id, - region=self.config.location, - http_uri=function_uri, - body=body, - schedule=str(schedule.cron_expression), - credentials=credentials, - service_account_email=scheduler_service_account_email, + # Using the Google Cloud AIPlatform client, upload and execute the + # pipeline + # on the Vertex AI Pipelines service. + self._upload_and_run_pipeline( + pipeline_name=deployment.pipeline_configuration.name, + pipeline_file_path=pipeline_file_path, + run_name=orchestrator_run_name, + settings=settings, + schedule=deployment.schedule, ) def _upload_and_run_pipeline( @@ -623,6 +477,7 @@ def _upload_and_run_pipeline( pipeline_file_path: str, run_name: str, settings: VertexOrchestratorSettings, + schedule: Optional["ScheduleResponse"] = None, ) -> None: """Uploads and run the pipeline on the Vertex AI Pipelines service. @@ -632,6 +487,7 @@ def _upload_and_run_pipeline( Kubeflow pipeline (compiled with Kubeflow SDK v2). run_name: Orchestrator run name. settings: Pipeline level settings for this orchestrator. + schedule: The schedule the pipeline will run on. """ # We have to replace the hyphens in the run name with underscores # and lower case the string, because the Vertex AI Pipelines service @@ -658,48 +514,65 @@ def _upload_and_run_pipeline( location=self.config.location, ) - logger.info( - "Submitting pipeline job with job_id `%s` to Vertex AI Pipelines " - "service.", - job_id, - ) + if self.config.workload_service_account: + logger.info( + "The Vertex AI Pipelines job workload will be executed " + "using the `%s` " + "service account.", + self.config.workload_service_account, + ) + if self.config.network: + logger.info( + "The Vertex AI Pipelines job will be peered with the `%s` " + "network.", + self.config.network, + ) - # Submit the job to Vertex AI Pipelines service. try: - if self.config.workload_service_account: + if schedule: logger.info( - "The Vertex AI Pipelines job workload will be executed " - "using the `%s` " - "service account.", - self.config.workload_service_account, + "Scheduling job using native Vertex AI Pipelines scheduling..." + ) + run.create_schedule( + display_name=schedule.name, + cron=schedule.cron_expression, + start_time=schedule.utc_start_time, + end_time=schedule.utc_end_time, + service_account=self.config.workload_service_account, + network=self.config.network, ) - if self.config.network: + else: logger.info( - "The Vertex AI Pipelines job will be peered with the `%s` " - "network.", - self.config.network, + "No schedule detected. Creating one-off Vertex job..." + ) + logger.info( + "Submitting pipeline job with job_id `%s` to Vertex AI Pipelines " + "service.", + job_id, ) - run.submit( - service_account=self.config.workload_service_account, - network=self.config.network, - ) - logger.info( - "View the Vertex AI Pipelines job at %s", run._dashboard_uri() - ) + # Submit the job to Vertex AI Pipelines service. - if settings.synchronous: + run.submit( + service_account=self.config.workload_service_account, + network=self.config.network, + ) logger.info( - "Waiting for the Vertex AI Pipelines job to finish..." + "View the Vertex AI Pipelines job at %s", + run._dashboard_uri(), ) - run.wait() + + if settings.synchronous: + logger.info( + "Waiting for the Vertex AI Pipelines job to finish..." + ) + run.wait() except google_exceptions.ClientError as e: logger.warning( "Failed to create the Vertex AI Pipelines job: %s", e ) - except RuntimeError as e: logger.error( "The Vertex AI Pipelines job execution has failed: %s", e diff --git a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/__init__.py b/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/__init__.py deleted file mode 100644 index b48ae62f55..0000000000 --- a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) ZenML GmbH 2022. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Loading the vertex scheduler package.""" diff --git a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/main.py b/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/main.py deleted file mode 100644 index bf4e0c749e..0000000000 --- a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/main.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright (c) ZenML GmbH 2022. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing -# permissions and limitations under the License. -"""Entrypoint for the scheduled vertex job.""" -import json -import logging -import random -from typing import TYPE_CHECKING - -from google.cloud import aiplatform - -if TYPE_CHECKING: - from flask import Request - - -# Constants for the scheduler - -TEMPLATE_PATH = "template_path" -JOB_ID = "job_id" -PIPELINE_ROOT = "pipeline_root" -PARAMETER_VALUES = "parameter_values" -ENABLE_CACHING = "enable_caching" -ENCRYPTION_SPEC_KEY_NAME = "encryption_spec_key_name" -LABELS = "labels" -PROJECT = "project" -LOCATION = "location" -WORKLOAD_SERVICE_ACCOUNT = "workload_service_account" -NETWORK = "network" - - -def trigger_vertex_job(request: "Request") -> str: - """Processes the incoming HTTP request. - - Args: - request: HTTP request object. - - Returns: - The response text or any set of values that can be turned into a Response. - """ - # decode http request payload and translate into JSON object - request_str = request.data.decode("utf-8") - request_json = json.loads(request_str) - - display_name = f"{request_json[JOB_ID]}-scheduled-{random.Random().getrandbits(32):08x}" - - run = aiplatform.PipelineJob( - display_name=display_name, - template_path=request_json[TEMPLATE_PATH], - job_id=display_name, - pipeline_root=request_json[PIPELINE_ROOT], - parameter_values=request_json[PARAMETER_VALUES], - enable_caching=request_json[ENABLE_CACHING], - encryption_spec_key_name=request_json[ENCRYPTION_SPEC_KEY_NAME], - labels=request_json[LABELS], - project=request_json[PROJECT], - location=request_json[LOCATION], - ) - - workload_service_account = request_json[WORKLOAD_SERVICE_ACCOUNT] - network = request_json[NETWORK] - - if workload_service_account: - logging.info( - "The Vertex AI Pipelines job workload will be executed " - "using the `%s` " - "service account.", - workload_service_account, - ) - - if network: - logging.info( - "The Vertex AI Pipelines job will be peered with the `%s` " - "network.", - network, - ) - - run.submit( - service_account=workload_service_account, - network=network, - ) - return f"{display_name} submitted!" diff --git a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/requirements.txt b/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/requirements.txt deleted file mode 100644 index d9f3c83c7b..0000000000 --- a/src/zenml/integrations/gcp/orchestrators/vertex_scheduler/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -google-api-python-client>=1.7.8,<2 -google-cloud-aiplatform[pipelines] \ No newline at end of file From c1abd870c0f64a4309dd8eed3c56fd66259dbe5a Mon Sep 17 00:00:00 2001 From: Christian Versloot Date: Tue, 6 Feb 2024 09:24:01 +0100 Subject: [PATCH 19/27] HyperAI integration: orchestrator and service connector (#2372) * Add init for HyperAI integration * WIP: HyperAI service connector * WIP * WIP: HyperAI Service Connector * WIP: HyperAI Orchestrator * Replace Docker compose write with temporary file and SCP * Variable assignment error * Set dependency * Set basic values of the HyperAI settings and config * Add config property * Allow mounts to be made * Remove newline * Finish (untested) orchestrator * Import HyperAI integration * Import HyperAI service connector in service connector registry * Rename resource type * Rename auth method * Force key to be base64 * Fixes to service connector * Identify instance by name and IP address * Strip IP address Python * Strip IP address Python * Return paramiko client * WIP * Mimic sagemaker integration * Fixes to make HyperAI orchestrator visible * Fixes to make orchestrator work * Temp change default local ip for testing * Environment fix * Use upstream steps to determine dependencies * Add support for scheduled pipelines * Polish schedules * Add configuration support for multiple Paramiko key types * Add Base64 instructions * Rename various vars * Add instructions about possible cron * Some docstring edits * Add setting for CR autologin * Add rudimentary Docker login * Move value * Add docstring * Remove unused def * Extract Paramiko key type given service connector configuration * Add better warnings * Check for None differently * Automatic Docker login if configured * Add HyperAI orchestrator flavor to docs * Basic docs for HyperAI orchestrator * Add HyperAI service connector to auth management docs * Add HyperAI service connector to docs * Set autologin to False by default * Add test similar to Airflow orchestrator * Formatting * Revert changes needed to run successfully locally * Add mount path validation * Improve error handling and formatting * Format mount paths differently * Upgrade azureml-core to 1.54.0.post1 * Fix docstring * Update src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py Co-authored-by: Michael Schuster * Rename def into _validate_mount_paht * Update config docstring to default to False * Move Settings, Config and Flavor to lavor folder * Remove type from docstring * Remove type from docstring * Remove type check convered by pydantic * Select container registry more efficiently * Remove redundant type conversion * Move Paramiko client creation into helper method * Reformatting * Fix imports * Temp changes for local testing * Fix imports * Revert "Temp changes for local testing" This reverts commit 76fdb295005884d581c1e4cc1769a05c1ae6cdf5. * Rename HYPERAI_RESOURCE_TYPE into hyperai-instance * Rename ip_address into hostname * Update src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py Co-authored-by: Stefan Nica * Raise AuthorizationException if client cannot be created * Remove RuntimeError in two places because it will never arrive in that state anymore * Remove try/catch statement * Let exception fall through if applicable * Remove raises * Add warning hint about long-lived credentials * Renames in docs based on changes * Add missing io import * Formatting * Add automatic_cleanup_pipeline_files to HyperAIOrchestratorConfig * Remove redundant variable assignment * Clean only if users configure auto cleaning * Update docs * Work in progress: multi IP service connector * Resources * Append hostname instead * Omit assigning value * Rename config value * Ensure that hostname is passed to Paramiko client * Raise NotImplementedError instead of pass value * Formatting * Changes to _verify * Reflect changes in service connector docs * Fix connector value validation to allow arrays to be used with the CLI * Reflect changes in orchestrator docs * Fix connector verification to allow the multi-instance case * Ensure that pipelines can run when scheduled by setting run ID dynamically * Reformatting * Add information about scheduled pipelines to docs * Use service connector username to create Compose files on instance * Add GPU reservation if configured that way * Formatting * Add instruction * Add prerequisites for HyperAI instance * Formatting and docstrings * Fixed remaining linter errors * Applied review suggestions * Add paramiko to API docs mocks * HyperAI orchestrator config tests; make additional assertions available and fix is_remote * Remove GPU-based Dockerfile * Ensure that shell commands are escaped when used * Provide password to stdin differently * Escape case where file cannot be written to HyperAI instance * Escape inputs differently * Use network mode host to avoid non-overlapping IPv4 network pool error * Disable security check for paramiko auto-add-policy * Changes to escaping * Silenced remaining security issues and fixed remaining linter errors --------- Co-authored-by: Michael Schuster Co-authored-by: Stefan Nica Co-authored-by: Alex Strick van Linschoten --- .../auth-management/auth-management.md | 5 + .../hyperai-service-connector.md | 57 +++ .../component-guide/orchestrators/hyperai.md | 90 ++++ .../orchestrators/orchestrators.md | 1 + docs/mocked_libs.json | 1 + pyproject.toml | 2 + src/zenml/cli/utils.py | 8 +- src/zenml/integrations/__init__.py | 1 + src/zenml/integrations/azure/__init__.py | 2 +- src/zenml/integrations/constants.py | 1 + src/zenml/integrations/hyperai/__init__.py | 53 +++ .../integrations/hyperai/flavors/__init__.py | 20 + .../flavors/hyperai_orchestrator_flavor.py | 161 +++++++ .../hyperai/orchestrators/__init__.py | 21 + .../orchestrators/hyperai_orchestrator.py | 418 ++++++++++++++++++ .../hyperai/service_connectors/__init__.py | 20 + .../hyperai_service_connector.py | 373 ++++++++++++++++ src/zenml/models/v2/core/service_connector.py | 6 + .../service_connector_registry.py | 7 + .../integrations/hyperai/__init__.py | 13 + .../hyperai/orchestrators/__init__.py | 13 + .../test_hyperai_orchestrator.py | 89 ++++ 22 files changed, 1359 insertions(+), 3 deletions(-) create mode 100644 docs/book/stacks-and-components/auth-management/hyperai-service-connector.md create mode 100644 docs/book/stacks-and-components/component-guide/orchestrators/hyperai.md create mode 100644 src/zenml/integrations/hyperai/__init__.py create mode 100644 src/zenml/integrations/hyperai/flavors/__init__.py create mode 100644 src/zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py create mode 100644 src/zenml/integrations/hyperai/orchestrators/__init__.py create mode 100644 src/zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py create mode 100644 src/zenml/integrations/hyperai/service_connectors/__init__.py create mode 100644 src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py create mode 100644 tests/integration/integrations/hyperai/__init__.py create mode 100644 tests/integration/integrations/hyperai/orchestrators/__init__.py create mode 100644 tests/integration/integrations/hyperai/orchestrators/test_hyperai_orchestrator.py diff --git a/docs/book/stacks-and-components/auth-management/auth-management.md b/docs/book/stacks-and-components/auth-management/auth-management.md index b312970b7a..f590278a44 100644 --- a/docs/book/stacks-and-components/auth-management/auth-management.md +++ b/docs/book/stacks-and-components/auth-management/auth-management.md @@ -101,6 +101,11 @@ zenml service-connector list-types โ”ƒ โ”‚ โ”‚ ๐ŸŒ€ kubernetes-cluster โ”‚ service-account โ”‚ โ”‚ โ”ƒ โ”ƒ โ”‚ โ”‚ ๐Ÿณ docker-registry โ”‚ oauth2-token โ”‚ โ”‚ โ”ƒ โ”ƒ โ”‚ โ”‚ โ”‚ impersonation โ”‚ โ”‚ โ”ƒ +โ” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”จ +โ”ƒ HyperAI Service Connector โ”‚ ๐Ÿค– hyperai โ”‚ ๐Ÿค– hyperai-instance โ”‚ rsa-key โ”‚ โœ… โ”‚ โœ… โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ dsa-key โ”‚ โ”‚ โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ ecdsa-key โ”‚ โ”‚ โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ ed25519-key โ”‚ โ”‚ โ”ƒ โ”—โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”› ``` {% endcode %} diff --git a/docs/book/stacks-and-components/auth-management/hyperai-service-connector.md b/docs/book/stacks-and-components/auth-management/hyperai-service-connector.md new file mode 100644 index 0000000000..56458c1b64 --- /dev/null +++ b/docs/book/stacks-and-components/auth-management/hyperai-service-connector.md @@ -0,0 +1,57 @@ +--- +description: Configuring HyperAI Connectors to connect ZenML to HyperAI instances. +--- + +# HyperAI Service Connector + +The ZenML HyperAI Service Connector allows authenticating with a HyperAI instance for deployment of pipeline runs. This connector provides pre-authenticated Paramiko SSH clients to Stack Components that are linked to it. + +``` +$ zenml service-connector list-types --type hyperai +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฏโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฏโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฏโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฏโ”โ”โ”โ”โ”โ”โ”โ”ฏโ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ NAME โ”‚ TYPE โ”‚ RESOURCE TYPES โ”‚ AUTH METHODS โ”‚ LOCAL โ”‚ REMOTE โ”ƒ +โ” โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”จ +โ”ƒ HyperAI Service Connector โ”‚ ๐Ÿค– hyperai โ”‚ ๐Ÿค– hyperai-instance โ”‚ rsa-key โ”‚ โœ… โ”‚ โœ… โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ dsa-key โ”‚ โ”‚ โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ ecdsa-key โ”‚ โ”‚ โ”ƒ +โ”ƒ โ”‚ โ”‚ โ”‚ ed25519-key โ”‚ โ”‚ โ”ƒ +โ”—โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”ทโ”โ”โ”โ”โ”โ”โ”โ”โ”› +``` + +## Prerequisites +The HyperAI Service Connector is part of the HyperAI integration. It is necessary to install the integration in order to use this Service Connector: + +* `zenml integration install hyperai` installs the HyperAI integration + +## Resource Types +The HyperAI Service Connector supports HyperAI instances. + +## Authentication Methods +ZenML creates an SSH connection to the HyperAI instance in the background when using this Service Connector. It then provides these connections to stack components requiring them, such as the HyperAI Orchestrator. Multiple authentication methods are supported: + +1. RSA key based authentication. +2. DSA (DSS) key based authentication. +3. ECDSA key based authentication. +4. ED25519 key based authentication. + +{% hint style="warning" %} +SSH private keys configured in the connector will be distributed to all clients that use them to run pipelines with the HyperAI orchestrator. SSH keys are long-lived credentials that give unrestricted access to HyperAI instances. +{% endhint %} + +When configuring the Service Connector, it is required to provide at least one hostname via `hostnames` and the `username` with which to login. Optionally, it is possible to provide an `ssh_passphrase` if applicable. This way, it is possible to use the HyperAI service connector in multiple ways: + +1. Create one service connector per HyperAI instance with different SSH keys. +2. Configure a reused SSH key just once for multiple HyperAI instances, then select the individual instance when creating the HyperAI orchestrator component. + +## Auto-configuration + +{% hint style="info" %} +This Service Connector does not support auto-discovery and extraction of authentication credentials from HyperAI instances. If this feature is useful to you or your organization, please let us know by messaging us in [Slack](https://zenml.io/slack-invite) or [creating an issue on GitHub](https://github.com/zenml-io/zenml/issues). +{% endhint %} + +## Stack Components use + +The HyperAI Service Connector can be used by the HyperAI Orchestrator to deploy pipeline runs to HyperAI instances. + + +
ZenML Scarf
diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/hyperai.md b/docs/book/stacks-and-components/component-guide/orchestrators/hyperai.md new file mode 100644 index 0000000000..d54dbdc41c --- /dev/null +++ b/docs/book/stacks-and-components/component-guide/orchestrators/hyperai.md @@ -0,0 +1,90 @@ +--- +description: Orchestrating your pipelines to run on HyperAI.ai instances. +--- + +# HyperAI orchestrator +[HyperAI](https://www.hyperai.ai) is a cutting-edge cloud compute platform designed to make AI accessible for everyone. The HyperAI orchestrator is an [orchestrator](orchestrators.md) flavor that allows you to easily deploy your pipelines on HyperAI instances. + +{% hint style="warning" %} +This component is only meant to be used within the context of +a [remote ZenML deployment scenario](/docs/book/deploying-zenml/zenml-self-hosted/zenml-self-hosted.md). +Usage with a local ZenML deployment may lead to unexpected behavior! +{% endhint %} + +### When to use it + +You should use the HyperAI orchestrator if: + +* you're looking for a managed solution for running your pipelines. +* you're a HyperAI customer. + +### Prerequisites +You will need to do the following to start using the HyperAI orchestrator: + +* Have a running HyperAI instance. It must be accessible from the internet (or at least from the IP addresses of your ZenML users) and allow SSH key based access (passwords are not supported). +* Ensure that a recent version of Docker is installed. This version must include Docker Compose, meaning that the command `docker compose` works. +* Ensure that the appropriate [NVIDIA Driver](https://www.nvidia.com/en-us/drivers/unix/) is installed on the HyperAI instance (if not already installed by the HyperAI team). +* Ensure that the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) is installed and configured on the HyperAI instance. + +Note that it is possible to omit installing the NVIDIA Driver and NVIDIA Container Toolkit. However, you will then be unable to use the GPU from within your ZenML pipeline. Additionally, you will then need to disable GPU access within the container when configuring the Orchestrator component, or the pipeline will not start correctly. + +## How it works +The HyperAI orchestrator works with Docker Compose, which can be used to construct machine learning pipelines. +Under the hood, it creates a Docker Compose file which it then deploys and executes on the configured HyperAI instance. +For each ZenML pipeline step, it creates a service in this file. It uses the `service_completed_successfully` condition +to ensure that pipeline steps will only run if their connected upstream steps have successfully finished. + +If configured for it, the HyperAI orchestrator will connect the HyperAI instance to the stack's container registry to ensure +a smooth transfer of Docker images. + +### Scheduled pipelines + +[Scheduled pipelines](../../../user-guide/advanced-guide/pipelining-features/schedule-pipeline-runs.md) are supported by the HyperAI orchestrator. Currently, only cron expressions are supported via `cron_expression`. When pipeline runs are scheduled, they are added as a crontab entry +on the HyperAI instance. + +### How to deploy it +To use the HyperAI orchestrator, you must configure a HyperAI Service Connector in ZenML and link it to the HyperAI orchestrator +component. The service connector contains credentials with which ZenML connects to the HyperAI instance. + +Additionally, the HyperAI orchestrator must be used in a stack that contains a container registry and an image builder. + +### How to use it + +To use the HyperAI orchestrator, we must configure a HyperAI Service Connector first using one of its supported authentication +methods. For example, for authentication with an RSA-based key, create the service connector as follows: + +```shell +zenml service-connector register --type=hyperai --auth-method=rsa-key --base64_ssh_key= --hostnames=,,.., --username= +``` + +Hostnames are either DNS resolvable names or IP addresses. + +For example, if you have two servers - one at `1.2.3.4` and another at `4.3.2.1`, you could provide them as `--hostnames=1.2.3.4,4.3.2.1`. + +Optionally, it is possible to provide a passphrase for the key (`--ssh_passphrase`). + +Following registering the service connector, we can register the orchestrator and use it in our active stack: + +```shell +zenml orchestrator register --flavor=hyperai + +# Register and activate a stack with the new orchestrator +zenml stack register -o ... --set +``` + +You can now run any ZenML pipeline using the HyperAI orchestrator: + +```shell +python file_that_runs_a_zenml_pipeline.py +``` + +#### Enabling CUDA for GPU-backed hardware + +Note that if you wish to use this orchestrator to run steps on a GPU, you will need to +follow [the instructions on this page](/docs/book/user-guide/advanced-guide/infrastructure-management/scale-compute-to-the-cloud.md) to ensure +that it works. It requires adding some extra settings customization and is essential to enable CUDA for the GPU to +give its full acceleration. + + +
ZenML Scarf
+ diff --git a/docs/book/stacks-and-components/component-guide/orchestrators/orchestrators.md b/docs/book/stacks-and-components/component-guide/orchestrators/orchestrators.md index 9bbca83fcf..139b1f83b3 100644 --- a/docs/book/stacks-and-components/component-guide/orchestrators/orchestrators.md +++ b/docs/book/stacks-and-components/component-guide/orchestrators/orchestrators.md @@ -38,6 +38,7 @@ Additional orchestrators are provided by integrations: | [SkypilotAWSOrchestrator](skypilot-vm.md) | `vm_aws` | `skypilot[aws]` | Runs your pipelines in AWS VMs using SkyPilot | | [SkypilotGCPOrchestrator](skypilot-vm.md) | `vm_gcp` | `skypilot[gcp]` | Runs your pipelines in GCP VMs using SkyPilot | | [SkypilotAzureOrchestrator](skypilot-vm.md) | `vm_azure` | `skypilot[azure]` | Runs your pipelines in Azure VMs using SkyPilot | +| [HyperAIOrchestrator](hyperai.md) | `hyperai` | `hyperai` | Runs your pipeline in HyperAI.ai instances. | [Custom Implementation](custom.md) | _custom_ | | Extend the orchestrator abstraction and provide your own implementation | If you would like to see the available flavors of orchestrators, you can use the command: diff --git a/docs/mocked_libs.json b/docs/mocked_libs.json index 07bb7c5042..0bd5108ae5 100644 --- a/docs/mocked_libs.json +++ b/docs/mocked_libs.json @@ -144,6 +144,7 @@ "neptune", "neuralprophet", "openai", + "paramiko", "polars", "pyarrow", "pyarrow.parquet", diff --git a/pyproject.toml b/pyproject.toml index 96e41dabfb..0b7d39f1f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,7 @@ types-certifi = { version = "^2021.10.8.0", optional = true } types-croniter = { version = "^1.0.2", optional = true } types-futures = { version = "^3.3.1", optional = true } types-Markdown = { version = "^3.3.6", optional = true } +types-paramiko = { version = ">=3.4.0", optional = true } types-Pillow = { version = "^9.2.1", optional = true } types-protobuf = { version = "^3.18.0", optional = true } types-PyMySQL = { version = "^1.0.4", optional = true } @@ -232,6 +233,7 @@ dev = [ "types-croniter", "types-futures", "types-Markdown", + "types-paramiko", "types-Pillow", "types-protobuf", "types-PyMySQL", diff --git a/src/zenml/cli/utils.py b/src/zenml/cli/utils.py index 0049bc3029..73fa3fb701 100644 --- a/src/zenml/cli/utils.py +++ b/src/zenml/cli/utils.py @@ -915,12 +915,14 @@ def prompt_configuration( config_dict = {} for attr_name, attr_schema in config_schema.get("properties", {}).items(): title = attr_schema.get("title", attr_name) - attr_type = attr_schema.get("type", "string") + attr_type_name = attr_type = attr_schema.get("type", "string") + if attr_type == "array": + attr_type_name = "list (CSV or JSON)" title = f"[{attr_name}] {title}" required = attr_name in config_schema.get("required", []) hidden = attr_schema.get("format", "") == "password" subtitles: List[str] = [] - subtitles.append(attr_type) + subtitles.append(attr_type_name) if hidden: subtitles.append("secret") if required: @@ -938,6 +940,8 @@ def prompt_configuration( if hidden and not show_secrets: title += " is currently set to: [HIDDEN]" else: + if attr_type == "array": + existing_value = json.dumps(existing_value) title += f" is currently set to: '{existing_value}'" else: title += " is not currently set" diff --git a/src/zenml/integrations/__init__.py b/src/zenml/integrations/__init__.py index 247282a1b0..786e4d86f7 100644 --- a/src/zenml/integrations/__init__.py +++ b/src/zenml/integrations/__init__.py @@ -35,6 +35,7 @@ GreatExpectationsIntegration, ) from zenml.integrations.huggingface import HuggingfaceIntegration # noqa +from zenml.integrations.hyperai import HyperAIIntegration # noqa from zenml.integrations.kaniko import KanikoIntegration # noqa from zenml.integrations.kserve import KServeIntegration # noqa from zenml.integrations.kubeflow import KubeflowIntegration # noqa diff --git a/src/zenml/integrations/azure/__init__.py b/src/zenml/integrations/azure/__init__.py index 4d1ceac545..1d7e2438ab 100644 --- a/src/zenml/integrations/azure/__init__.py +++ b/src/zenml/integrations/azure/__init__.py @@ -43,7 +43,7 @@ class AzureIntegration(Integration): "azure-keyvault-keys", "azure-keyvault-secrets", "azure-identity==1.10.0", - "azureml-core==1.48.0", + "azureml-core==1.54.0.post1", "azure-mgmt-containerservice>=20.0.0", "azure-storage-blob==12.17.0", # temporary fix for https://github.com/Azure/azure-sdk-for-python/issues/32056 "kubernetes", diff --git a/src/zenml/integrations/constants.py b/src/zenml/integrations/constants.py index 8d654bee0a..a833d08e2d 100644 --- a/src/zenml/integrations/constants.py +++ b/src/zenml/integrations/constants.py @@ -30,6 +30,7 @@ GRAPHVIZ = "graphviz" KSERVE = "kserve" HUGGINGFACE = "huggingface" +HYPERAI = "hyperai" GREAT_EXPECTATIONS = "great_expectations" KANIKO = "kaniko" KUBEFLOW = "kubeflow" diff --git a/src/zenml/integrations/hyperai/__init__.py b/src/zenml/integrations/hyperai/__init__.py new file mode 100644 index 0000000000..936c1f7863 --- /dev/null +++ b/src/zenml/integrations/hyperai/__init__.py @@ -0,0 +1,53 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the HyperAI integration.""" +from typing import List, Type + +from zenml.integrations.constants import HYPERAI +from zenml.integrations.integration import Integration +from zenml.stack import Flavor + +# Service connector constants +HYPERAI_CONNECTOR_TYPE = "hyperai" +HYPERAI_RESOURCE_TYPE = "hyperai-instance" + + +class HyperAIIntegration(Integration): + """Definition of HyperAI integration for ZenML.""" + + NAME = HYPERAI + REQUIREMENTS = [ + "paramiko>=3.4.0", + ] + + @classmethod + def activate(cls) -> None: + """Activates the integration.""" + from zenml.integrations.hyperai import service_connectors # noqa + + @classmethod + def flavors(cls) -> List[Type[Flavor]]: + """Declare the stack component flavors for the HyperAI integration. + + Returns: + List of stack component flavors for this integration. + """ + from zenml.integrations.hyperai.flavors import ( + HyperAIOrchestratorFlavor + ) + + return [HyperAIOrchestratorFlavor] + + +HyperAIIntegration.check_installation() diff --git a/src/zenml/integrations/hyperai/flavors/__init__.py b/src/zenml/integrations/hyperai/flavors/__init__.py new file mode 100644 index 0000000000..861bff7be1 --- /dev/null +++ b/src/zenml/integrations/hyperai/flavors/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Airflow integration flavors.""" + +from zenml.integrations.hyperai.flavors.hyperai_orchestrator_flavor import ( + HyperAIOrchestratorFlavor, +) + +__all__ = ["HyperAIOrchestratorFlavor"] diff --git a/src/zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py b/src/zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py new file mode 100644 index 0000000000..6acc64b839 --- /dev/null +++ b/src/zenml/integrations/hyperai/flavors/hyperai_orchestrator_flavor.py @@ -0,0 +1,161 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the ZenML HyperAI orchestrator.""" + +from typing import TYPE_CHECKING, Dict, Optional, Type + +from zenml.config.base_settings import BaseSettings +from zenml.integrations.hyperai import HYPERAI_RESOURCE_TYPE +from zenml.logger import get_logger +from zenml.models import ServiceConnectorRequirements +from zenml.orchestrators import ( + BaseOrchestratorConfig, + BaseOrchestratorFlavor, +) + +if TYPE_CHECKING: + from zenml.integrations.hyperai.orchestrators import HyperAIOrchestrator + +logger = get_logger(__name__) + + +class HyperAIOrchestratorSettings(BaseSettings): + """HyperAI orchestrator settings. + + Attributes: + mounts_from_to: A dictionary mapping from paths on the HyperAI instance + to paths within the Docker container. This allows users to mount + directories from the HyperAI instance into the Docker container that runs + on it. + """ + + mounts_from_to: Dict[str, str] = {} + + +class HyperAIOrchestratorConfig( # type: ignore[misc] # https://github.com/pydantic/pydantic/issues/4173 + BaseOrchestratorConfig, HyperAIOrchestratorSettings +): + """Configuration for the HyperAI orchestrator. + + Attributes: + container_registry_autologin: If True, the orchestrator will attempt to + automatically log in to the container registry specified in the stack + configuration on the HyperAI instance. This is useful if the container + registry requires authentication and the HyperAI instance has not been + manually logged in to the container registry. Defaults to `False`. + automatic_cleanup_pipeline_files: If True, the orchestrator will + automatically clean up old pipeline files that are on the HyperAI + instance. Pipeline files will be cleaned up if they are 7 days old or + older. Defaults to `True`. + gpu_enabled_in_container: If True, the orchestrator will enable GPU + support in the Docker container that runs on the HyperAI instance. + Defaults to `True`. + + """ + + container_registry_autologin: bool = False + automatic_cleanup_pipeline_files: bool = True + gpu_enabled_in_container: bool = True + + @property + def is_remote(self) -> bool: + """Checks if this stack component is running remotely. + + This designation is used to determine if the stack component can be + used with a local ZenML database or if it requires a remote ZenML + server. + + Returns: + True if this config is for a remote component, False otherwise. + """ + return True + + +class HyperAIOrchestratorFlavor(BaseOrchestratorFlavor): + """Flavor for the HyperAI orchestrator.""" + + @property + def name(self) -> str: + """Name of the orchestrator flavor. + + Returns: + Name of the orchestrator flavor. + """ + return "hyperai" + + @property + def service_connector_requirements( + self, + ) -> Optional[ServiceConnectorRequirements]: + """Service connector resource requirements for service connectors. + + Specifies resource requirements that are used to filter the available + service connector types that are compatible with this flavor. + + Returns: + Requirements for compatible service connectors, if a service + connector is required for this flavor. + """ + return ServiceConnectorRequirements( + resource_type=HYPERAI_RESOURCE_TYPE + ) + + @property + def docs_url(self) -> Optional[str]: + """A url to point at docs explaining this flavor. + + Returns: + A flavor docs url. + """ + return self.generate_default_docs_url() + + @property + def sdk_docs_url(self) -> Optional[str]: + """A url to point at SDK docs explaining this flavor. + + Returns: + A flavor SDK docs url. + """ + return self.generate_default_sdk_docs_url() + + @property + def logo_url(self) -> str: + """A url to represent the flavor in the dashboard. + + Returns: + The flavor logo. + """ + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/connectors/hyperai/hyperai.png" + + @property + def config_class(self) -> Type[BaseOrchestratorConfig]: + """Config class for the base orchestrator flavor. + + Returns: + The config class. + """ + return HyperAIOrchestratorConfig + + @property + def implementation_class(self) -> Type["HyperAIOrchestrator"]: + """Implementation class for this flavor. + + Returns: + Implementation class for this flavor. + """ + from zenml.integrations.hyperai.orchestrators import ( + HyperAIOrchestrator, + ) + + return HyperAIOrchestrator diff --git a/src/zenml/integrations/hyperai/orchestrators/__init__.py b/src/zenml/integrations/hyperai/orchestrators/__init__.py new file mode 100644 index 0000000000..26789a5331 --- /dev/null +++ b/src/zenml/integrations/hyperai/orchestrators/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""HyperAI orchestrator.""" +from zenml.integrations.hyperai.orchestrators.hyperai_orchestrator import ( + HyperAIOrchestrator +) + +__all__ = [ + "HyperAIOrchestrator" +] diff --git a/src/zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py b/src/zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py new file mode 100644 index 0000000000..68993ce3ec --- /dev/null +++ b/src/zenml/integrations/hyperai/orchestrators/hyperai_orchestrator.py @@ -0,0 +1,418 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the ZenML HyperAI orchestrator.""" + +import os +import re +import tempfile +from shlex import quote +from typing import TYPE_CHECKING, Any, Dict, Optional, Type, cast + +import paramiko +import yaml + +from zenml.config.base_settings import BaseSettings +from zenml.entrypoints import StepEntrypointConfiguration +from zenml.enums import StackComponentType +from zenml.integrations.hyperai.flavors.hyperai_orchestrator_flavor import ( + HyperAIOrchestratorConfig, + HyperAIOrchestratorSettings, +) +from zenml.logger import get_logger +from zenml.orchestrators import ( + ContainerizedOrchestrator, +) +from zenml.stack import Stack, StackValidator + +if TYPE_CHECKING: + from zenml.models import PipelineDeploymentResponse + +logger = get_logger(__name__) + +ENV_ZENML_HYPERAI_RUN_ID = "ZENML_HYPERAI_ORCHESTRATOR_RUN_ID" + + +class HyperAIOrchestrator(ContainerizedOrchestrator): + """Orchestrator responsible for running pipelines on HyperAI instances.""" + + @property + def config(self) -> HyperAIOrchestratorConfig: + """Returns the `HyperAIOrchestratorConfig` config. + + Returns: + The configuration. + """ + return cast(HyperAIOrchestratorConfig, self._config) + + @property + def settings_class(self) -> Optional[Type["BaseSettings"]]: + """Settings class for the HyperAI orchestrator. + + Returns: + The settings class. + """ + return HyperAIOrchestratorSettings + + @property + def validator(self) -> Optional[StackValidator]: + """Ensures there is an image builder in the stack. + + Returns: + A `StackValidator` instance. + """ + return StackValidator( + required_components={ + StackComponentType.CONTAINER_REGISTRY, + StackComponentType.IMAGE_BUILDER, + } + ) + + def get_orchestrator_run_id(self) -> str: + """Returns the active orchestrator run id. + + Raises: + RuntimeError: If the environment variable specifying the run id + is not set. + + Returns: + The orchestrator run id. + """ + try: + return os.environ[ENV_ZENML_HYPERAI_RUN_ID] + except KeyError: + raise RuntimeError( + "Unable to read run id from environment variable " + f"{ENV_ZENML_HYPERAI_RUN_ID}." + ) + + def _validate_mount_path(self, path: str) -> str: + """Validates if a given string is in a valid path format. + + Args: + path: The path to be validated. + + Returns: + The path in a valid format. + + Raises: + RuntimeError: If the path is not in a valid format. + """ + # Define a regular expression pattern to match a valid path format + pattern = r'^(?:[a-zA-Z]:\\(\\[^\\/:*?"<>|]*)*$|^/([^\0]*)*$)' + + if bool(re.match(pattern, path)): + return path + else: + raise RuntimeError( + f"Path '{path}' is not in a valid format, so a mount cannot be established." + ) + + def _escape_shell_command(self, command: str) -> str: + """Escapes a shell command. + + Args: + command: The command to escape. + + Returns: + The escaped command. + """ + return quote(command) + + def prepare_or_run_pipeline( + self, + deployment: "PipelineDeploymentResponse", + stack: "Stack", + environment: Dict[str, str], + ) -> Any: + """Sequentially runs all pipeline steps in Docker containers. + + Assumes that: + - A HyperAI (hyperai.ai) instance is running on the configured IP address. + - The HyperAI instance has been configured to allow SSH connections from the + machine running the pipeline. + - Docker and Docker Compose are installed on the HyperAI instance. + - A key pair has been generated and the public key has been added to the + HyperAI instance's `authorized_keys` file. + - The private key is available in a HyperAI service connector linked to this + orchestrator. + + Args: + deployment: The pipeline deployment to prepare or run. + stack: The stack the pipeline will run on. + environment: Environment variables to set in the orchestration + environment. + + Raises: + RuntimeError: If a step fails. + """ + from zenml.integrations.hyperai.service_connectors.hyperai_service_connector import ( + HyperAIServiceConnector, + ) + + # Basic Docker Compose definition + compose_definition: Dict[str, Any] = {"version": "3", "services": {}} + + # Get deployment id + deployment_id = deployment.id + + # Set environment + os.environ[ENV_ZENML_HYPERAI_RUN_ID] = str(deployment_id) + environment[ENV_ZENML_HYPERAI_RUN_ID] = str(deployment_id) + + # Add each step as a service to the Docker Compose definition + logger.info("Preparing pipeline steps for deployment.") + for step_name, step in deployment.step_configurations.items(): + # Get image + image = self.get_image(deployment=deployment, step_name=step_name) + + # Get settings + step_settings = cast( + HyperAIOrchestratorSettings, self.get_settings(step) + ) + + # Define container name as combination between deployment id and step name + container_name = f"{deployment_id}-{step_name}" + + # Make Compose service definition for step + compose_definition["services"][container_name] = { + "image": image, + "container_name": container_name, + "network_mode": "host", + "entrypoint": StepEntrypointConfiguration.get_entrypoint_command(), + "command": StepEntrypointConfiguration.get_entrypoint_arguments( + step_name=step_name, deployment_id=deployment.id + ), + "volumes": [ + "{}:{}".format( + self._validate_mount_path(mount_from), + self._validate_mount_path(mount_to), + ) + for mount_from, mount_to in step_settings.mounts_from_to.items() + ], + } + + # Depending on GPU setting, add GPU support to service definition + if self.config.gpu_enabled_in_container: + compose_definition["services"][container_name]["deploy"] = { + "resources": { + "reservations": { + "devices": [ + {"driver": "nvidia", "capabilities": ["gpu"]} + ] + } + } + } + + # Depending on whether it is a scheduled or a realtime pipeline, add + # potential .env file to service definition for deployment ID override. + if deployment.schedule: + # drop ZENML_HYPERAI_ORCHESTRATOR_RUN_ID from environment + del environment[ENV_ZENML_HYPERAI_RUN_ID] + compose_definition["services"][container_name]["env_file"] = [ + ".env" + ] + + compose_definition["services"][container_name][ + "environment" + ] = environment + + # Add dependency on upstream steps if applicable + upstream_steps = step.spec.upstream_steps + for upstream_step_name in upstream_steps: + upstream_container_name = ( + f"{deployment_id}-{upstream_step_name}" + ) + compose_definition["services"][container_name][ + "depends_on" + ] = { + upstream_container_name: { + "condition": "service_completed_successfully" + } + } + + # Convert into yaml + logger.info("Finalizing Docker Compose definition.") + compose_definition_yaml: str = yaml.dump(compose_definition) + + # Connect to configured HyperAI instance + logger.info( + "Connecting to HyperAI instance and placing Docker Compose file." + ) + paramiko_client: paramiko.SSHClient + if connector := self.get_connector(): + paramiko_client = connector.connect() + if paramiko_client is None: + raise RuntimeError( + "Expected to receive a `paramiko.SSHClient` object from the " + "linked connector, but got `None`. This likely originates from " + "a misconfigured service connector, typically caused by a wrong " + "SSH key type being selected. Please check your " + "`hyperai_orchestrator` configuration and make sure that the " + "`ssh_key_type` of its connected service connector is set to the " + "correct value." + ) + elif not isinstance(paramiko_client, paramiko.SSHClient): + raise RuntimeError( + f"Expected to receive a `paramiko.SSHClient` object from the " + f"linked connector, but got type `{type(paramiko_client)}`." + ) + else: + raise RuntimeError( + "You must link a HyperAI service connector to the orchestrator." + ) + + # Get container registry autologin setting + if self.config.container_registry_autologin: + logger.info( + "Attempting to automatically log in to container registry used by stack." + ) + + # Select stack container registry + container_registry = stack.container_registry + + # Raise error if no container registry is found + if not container_registry: + raise RuntimeError( + "Unable to find container registry in stack." + ) + + # Get container registry credentials from its config + credentials = container_registry.credentials + if credentials is None: + raise RuntimeError( + "The container registry in the active stack has no " + "credentials or service connector configured, but the " + "HyperAI orchestrator is set to autologin to the container " + "registry. Please configure the container registry with " + "credentials or turn off the `container_registry_autologin` " + "setting in the HyperAI orchestrator configuration." + ) + + container_registry_url = container_registry.config.uri + ( + container_registry_username, + container_registry_password, + ) = credentials + + # Escape inputs + container_registry_username = self._escape_shell_command( + container_registry_username + ) + container_registry_url = self._escape_shell_command( + container_registry_url + ) + + # Log in to container registry using --password-stdin + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"docker login -u {container_registry_username} " + f"--password-stdin {container_registry_url}" + ) + # Send the password to stdin + stdin.channel.send( + f"{container_registry_password}\n".encode("utf-8") + ) + stdin.channel.shutdown_write() + + # Log stdout + for line in stdout.readlines(): + logger.info(line) + + # Get username from connector + assert isinstance(connector, HyperAIServiceConnector) + username = connector.config.username + + # Set up pipeline-runs directory if it doesn't exist + nonscheduled_directory_name = self._escape_shell_command( + f"/home/{username}/pipeline-runs" + ) + directory_name = ( + nonscheduled_directory_name + if not deployment.schedule + else self._escape_shell_command( + f"/home/{username}/scheduled-pipeline-runs" + ) + ) + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"mkdir -p {directory_name}" + ) + + # Get pipeline run id and create directory for it + orchestrator_run_id = self.get_orchestrator_run_id() + directory_name = self._escape_shell_command( + f"{directory_name}/{orchestrator_run_id}" + ) + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"mkdir -p {directory_name}" + ) + + # Remove all folders from nonscheduled pipelines if they are 7 days old or older + if self.config.automatic_cleanup_pipeline_files: + logger.info( + "Cleaning up old pipeline files on HyperAI instance. This may take a while." + ) + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"find {nonscheduled_directory_name} -type d -ctime +7 -exec rm -rf {{}} +" + ) + + # Create temporary file and write Docker Compose file to it + with tempfile.NamedTemporaryFile(mode="w", delete=True) as f: + # Write Docker Compose file to temporary file + with f.file as f_: + f_.write(compose_definition_yaml) + + # Scp Docker Compose file to HyperAI instance + try: + scp_client = paramiko_client.open_sftp() + scp_client.put(f.name, f"{directory_name}/docker-compose.yaml") + scp_client.close() + except FileNotFoundError: + raise RuntimeError( + "Failed to write Docker Compose file to HyperAI instance. Does the user have permissions to write?" + ) + + # Run or schedule Docker Compose file depending on settings + if not deployment.schedule: + logger.info( + "Starting ZenML pipeline on HyperAI instance. Depending on the size of your container image, this may take a while..." + ) + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"cd {directory_name} && docker compose up -d" + ) + + # Log errors in case of failure + for line in stderr.readlines(): + logger.info(line) + else: + # Get cron expression for scheduled pipeline + cron_expression = deployment.schedule.cron_expression + if not cron_expression: + raise RuntimeError( + "A cron expression is required for scheduled pipelines." + ) + expected_cron_pattern = r"^(?:(?:[0-9]|[1-5][0-9]|60)(?:,(?:[0-9]|[1-5][0-9]|60))*|[*](?:\/[1-9][0-9]*)?)(?:[ \t]+(?:(?:[0-9]|[0-5][0-9]|60)(?:,(?:[0-9]|[0-5][0-9]|60))*|[*](?:\/[1-9][0-9]*)?)){4}$" + if not re.match(expected_cron_pattern, cron_expression): + raise RuntimeError( + f"The cron expression '{cron_expression}' is not in a valid format." + ) + + # Log about scheduling + logger.info("Scheduling ZenML pipeline on HyperAI instance.") + logger.info(f"Cron expression: {cron_expression}") + + # Create cron job for scheduled pipeline on HyperAI instance + stdin, stdout, stderr = paramiko_client.exec_command( # nosec + f"(crontab -l ; echo '{cron_expression} cd {directory_name} && echo {ENV_ZENML_HYPERAI_RUN_ID}=\"{deployment_id}_$(date +\%s)\" > .env && docker compose up -d') | crontab -" + ) + + logger.info("Pipeline scheduled successfully.") diff --git a/src/zenml/integrations/hyperai/service_connectors/__init__.py b/src/zenml/integrations/hyperai/service_connectors/__init__.py new file mode 100644 index 0000000000..1b641cb297 --- /dev/null +++ b/src/zenml/integrations/hyperai/service_connectors/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""HyperAI Service Connector.""" + +from zenml.integrations.hyperai.service_connectors.hyperai_service_connector import ( + HyperAIServiceConnector, +) + +__all__ = ["HyperAIServiceConnector"] diff --git a/src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py b/src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py new file mode 100644 index 0000000000..12680850b7 --- /dev/null +++ b/src/zenml/integrations/hyperai/service_connectors/hyperai_service_connector.py @@ -0,0 +1,373 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""HyperAI Service Connector. + +The HyperAI Service Connector allows authenticating to HyperAI (hyperai.ai) +GPU equipped instances. +""" +import base64 +import io +from typing import Any, List, Optional, Type + +import paramiko +from pydantic import Field, SecretStr + +from zenml.exceptions import AuthorizationException +from zenml.integrations.hyperai import ( + HYPERAI_CONNECTOR_TYPE, + HYPERAI_RESOURCE_TYPE, +) +from zenml.logger import get_logger +from zenml.models import ( + AuthenticationMethodModel, + ResourceTypeModel, + ServiceConnectorTypeModel, +) +from zenml.service_connectors.service_connector import ( + AuthenticationConfig, + ServiceConnector, +) +from zenml.utils.enum_utils import StrEnum + +logger = get_logger(__name__) + + +class HyperAICredentials(AuthenticationConfig): + """HyperAI client authentication credentials.""" + + base64_ssh_key: SecretStr = Field( + title="SSH key (base64)", + ) + ssh_passphrase: Optional[SecretStr] = Field( + default=None, + title="SSH key passphrase", + ) + + +class HyperAIConfiguration(HyperAICredentials): + """HyperAI client configuration.""" + + hostnames: List[str] = Field( + title="Hostnames of the supported HyperAI instances.", + ) + + username: str = Field( + title="Username to use to connect to the HyperAI instance.", + ) + + +class HyperAIAuthenticationMethods(StrEnum): + """HyperAI Authentication methods.""" + + RSA_KEY_OPTIONAL_PASSPHRASE = "rsa-key" + DSA_KEY_OPTIONAL_PASSPHRASE = "dsa-key" + ECDSA_KEY_OPTIONAL_PASSPHRASE = "ecdsa-key" + ED25519_KEY_OPTIONAL_PASSPHRASE = "ed25519-key" + + +HYPERAI_SERVICE_CONNECTOR_TYPE_SPEC = ServiceConnectorTypeModel( + name="HyperAI Service Connector", + connector_type=HYPERAI_CONNECTOR_TYPE, + description=""" +The ZenML HyperAI Service Connector allows authenticating to HyperAI (hyperai.ai) +GPU equipped instances. + +This connector provides an SSH connection to your HyperAI instance, which can be +used to run ZenML pipelines. + +The instance must be configured to allow SSH connections from the ZenML server. +Docker and Docker Compose must be installed on the HyperAI instance. If you want +to use scheduled pipeline runs, also ensure that a working cron daemon is installed +and running on the HyperAI instance. +""", + logo_url="https://public-flavor-logos.s3.eu-central-1.amazonaws.com/connectors/hyperai/hyperai.png", + emoji=":robot_face:", + auth_methods=[ + AuthenticationMethodModel( + name="RSA key with optional passphrase", + auth_method=HyperAIAuthenticationMethods.RSA_KEY_OPTIONAL_PASSPHRASE, + description=""" +Use an RSA private key to authenticate with a HyperAI instance. The key may be +encrypted with a passphrase. If the key is encrypted, the passphrase must be +provided. Make sure to provide the key as a Base64 encoded string. +""", + config_class=HyperAIConfiguration, + ), + AuthenticationMethodModel( + name="DSA/DSS key with optional passphrase", + auth_method=HyperAIAuthenticationMethods.DSA_KEY_OPTIONAL_PASSPHRASE, + description=""" +Use a DSA/DSS private key to authenticate with a HyperAI instance. The key may be +encrypted with a passphrase. If the key is encrypted, the passphrase must be +provided. Make sure to provide the key as a Base64 encoded string. +""", + config_class=HyperAIConfiguration, + ), + AuthenticationMethodModel( + name="ECDSA key with optional passphrase", + auth_method=HyperAIAuthenticationMethods.ECDSA_KEY_OPTIONAL_PASSPHRASE, + description=""" +Use an ECDSA private key to authenticate with a HyperAI instance. The key may be +encrypted with a passphrase. If the key is encrypted, the passphrase must be +provided. Make sure to provide the key as a Base64 encoded string. +""", + config_class=HyperAIConfiguration, + ), + AuthenticationMethodModel( + name="Ed25519 key with optional passphrase", + auth_method=HyperAIAuthenticationMethods.ED25519_KEY_OPTIONAL_PASSPHRASE, + description=""" +Use an Ed25519 private key to authenticate with a HyperAI instance. The key may be +encrypted with a passphrase. If the key is encrypted, the passphrase must be +provided. Make sure to provide the key as a Base64 encoded string. +""", + config_class=HyperAIConfiguration, + ), + ], + resource_types=[ + ResourceTypeModel( + name="HyperAI instance", + resource_type=HYPERAI_RESOURCE_TYPE, + description=""" +Allows users to access a HyperAI instance as a resource. When used by +connector consumers, they are provided a pre-authenticated SSH client +instance. +""", + auth_methods=HyperAIAuthenticationMethods.values(), + supports_instances=True, + logo_url="https://public-flavor-logos.s3.eu-central-1.amazonaws.com/connectors/hyperai/hyperai.png", + emoji=":robot_face:", + ), + ], +) + + +class HyperAIServiceConnector(ServiceConnector): + """HyperAI service connector.""" + + config: HyperAIConfiguration + + @classmethod + def _get_connector_type(cls) -> ServiceConnectorTypeModel: + """Get the service connector specification. + + Returns: + The service connector specification. + """ + return HYPERAI_SERVICE_CONNECTOR_TYPE_SPEC + + def _paramiko_key_type_given_auth_method(self) -> Type[paramiko.PKey]: + """Get the Paramiko key type given the authentication method. + + Returns: + The Paramiko key type. + + Raises: + ValueError: If the authentication method is invalid. + """ + mapping = { + HyperAIAuthenticationMethods.RSA_KEY_OPTIONAL_PASSPHRASE: paramiko.RSAKey, + HyperAIAuthenticationMethods.DSA_KEY_OPTIONAL_PASSPHRASE: paramiko.DSSKey, + HyperAIAuthenticationMethods.ECDSA_KEY_OPTIONAL_PASSPHRASE: paramiko.ECDSAKey, + HyperAIAuthenticationMethods.ED25519_KEY_OPTIONAL_PASSPHRASE: paramiko.Ed25519Key, + } + + try: + return mapping[HyperAIAuthenticationMethods(self.auth_method)] + except KeyError: + raise ValueError( + f"Invalid authentication method: {self.auth_method}" + ) + + def _create_paramiko_client( + self, hostname: str + ) -> paramiko.client.SSHClient: + """Create a Paramiko SSH client based on the configuration. + + Args: + hostname: The hostname of the HyperAI instance. + + Returns: + A Paramiko SSH client. + + Raises: + AuthorizationException: If the client cannot be created. + """ + if self.config.ssh_passphrase is None: + ssh_passphrase = None + else: + ssh_passphrase = self.config.ssh_passphrase.get_secret_value() + + # Connect to the HyperAI instance + try: + # Convert the SSH key from base64 to string + base64_key_value = self.config.base64_ssh_key.get_secret_value() + ssh_key = base64.b64decode(base64_key_value).decode("utf-8") + paramiko_key = None + + with io.StringIO(ssh_key) as f: + paramiko_key = self._paramiko_key_type_given_auth_method().from_private_key( + f, password=ssh_passphrase + ) + + # Trim whitespace from the IP address + hostname = hostname.strip() + + paramiko_client = paramiko.client.SSHClient() + paramiko_client.set_missing_host_key_policy( + paramiko.AutoAddPolicy() # nosec + ) + paramiko_client.connect( + hostname=hostname, + username=self.config.username, + pkey=paramiko_key, + timeout=30, + ) + + return paramiko_client + + except paramiko.ssh_exception.BadHostKeyException as e: + logger.error("Bad host key: %s", e) + except paramiko.ssh_exception.AuthenticationException as e: + logger.error("Authentication failed: %s", e) + except paramiko.ssh_exception.SSHException as e: + logger.error( + "SSH error: %s. A common cause for this error is selection of the wrong key type in your service connector.", + e, + ) + except Exception as e: + logger.error( + "Unknown error while connecting to HyperAI instance: %s. Please check your network connection, IP address, and authentication details.", + e, + ) + + raise AuthorizationException( + "Could not create SSH client for HyperAI instance." + ) + + def _authorize_client(self, hostname: str) -> None: + """Verify that the client can authenticate with the HyperAI instance. + + Args: + hostname: The hostname of the HyperAI instance. + """ + logger.info("Verifying connection to HyperAI instance...") + + paramiko_client = self._create_paramiko_client(hostname) + paramiko_client.close() + + def _connect_to_resource( + self, + **kwargs: Any, + ) -> Any: + """Connect to a HyperAI instance. Returns an authenticated SSH client. + + Args: + kwargs: Additional implementation specific keyword arguments to pass + to the session or client constructor. + + Returns: + An authenticated Paramiko SSH client. + """ + logger.info("Connecting to HyperAI instance...") + assert self.resource_id is not None + + paramiko_client = self._create_paramiko_client(self.resource_id) + return paramiko_client + + def _configure_local_client( + self, + **kwargs: Any, + ) -> None: + """There is no local client for the HyperAI connector, so it does nothing. + + Args: + kwargs: Additional implementation specific keyword arguments to pass + to the session or client constructor. + + Raises: + NotImplementedError: If there is no local client for the HyperAI + connector. + """ + raise NotImplementedError( + "There is no local client for the HyperAI connector." + ) + + @classmethod + def _auto_configure( + cls, + auth_method: Optional[str] = None, + resource_type: Optional[str] = None, + resource_id: Optional[str] = None, + **kwargs: Any, + ) -> "HyperAIServiceConnector": + """Auto-configure the connector. + + Not supported by the HyperAI connector. + + Args: + auth_method: The particular authentication method to use. If not + specified, the connector implementation must decide which + authentication method to use or raise an exception. + resource_type: The type of resource to configure. + resource_id: The ID of the resource to configure. The + implementation may choose to either require or ignore this + parameter if it does not support or detect an resource type that + supports multiple instances. + kwargs: Additional implementation specific keyword arguments to use. + + Raises: + NotImplementedError: If the connector auto-configuration fails or + is not supported. + """ + raise NotImplementedError( + "Auto-configuration is not supported by the HyperAI connector." + ) + + def _verify( + self, + resource_type: Optional[str] = None, + resource_id: Optional[str] = None, + ) -> List[str]: + """Verify that a connection can be established to the HyperAI instance. + + Args: + resource_type: The type of resource to verify. Must be set to the + Docker resource type. + resource_id: The HyperAI instance to verify. + + Returns: + The resource ID if the connection can be established. + + Raises: + ValueError: If the resource ID is not in the list of configured + hostnames. + """ + if resource_id: + if resource_id not in self.config.hostnames: + raise ValueError( + f"The supplied hostname '{resource_id}' is not in the list " + f"of configured hostnames: {self.config.hostnames}. Please " + f"check your configuration." + ) + hostnames = [resource_id] + else: + hostnames = self.config.hostnames + + resources = [] + for hostname in hostnames: + self._authorize_client(hostname) + resources.append(hostname) + + return resources diff --git a/src/zenml/models/v2/core/service_connector.py b/src/zenml/models/v2/core/service_connector.py index 08e189d2c9..cbc6938d61 100644 --- a/src/zenml/models/v2/core/service_connector.py +++ b/src/zenml/models/v2/core/service_connector.py @@ -829,6 +829,7 @@ def _validate_and_configure_resources( "required", [] ) secret = attr_schema.get("format", "") == "password" + attr_type = attr_schema.get("type", "string") value = configuration.get(attr_name, secrets.get(attr_name)) if required: if value is None: @@ -846,6 +847,11 @@ def _validate_and_configure_resources( else: update_connector_metadata.secrets[attr_name] = SecretStr(value) else: + if attr_type == "array" and isinstance(value, str): + try: + value = json.loads(value) + except json.decoder.JSONDecodeError: + value = value.split(",") update_connector_metadata.configuration[attr_name] = value # Warn about attributes that are not part of the configuration schema diff --git a/src/zenml/service_connectors/service_connector_registry.py b/src/zenml/service_connectors/service_connector_registry.py index fdb580b024..dd25bc969e 100644 --- a/src/zenml/service_connectors/service_connector_registry.py +++ b/src/zenml/service_connectors/service_connector_registry.py @@ -245,5 +245,12 @@ def register_builtin_service_connectors(self) -> None: except ImportError as e: logger.warning(f"Could not import Docker service connector: {e}.") + try: + from zenml.integrations.hyperai.service_connectors.hyperai_service_connector import ( # noqa + HyperAIServiceConnector, + ) + except ImportError as e: + logger.warning(f"Could not import HyperAI service connector: {e}.") + service_connector_registry = ServiceConnectorRegistry() diff --git a/tests/integration/integrations/hyperai/__init__.py b/tests/integration/integrations/hyperai/__init__.py new file mode 100644 index 0000000000..cd90a82cfc --- /dev/null +++ b/tests/integration/integrations/hyperai/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/tests/integration/integrations/hyperai/orchestrators/__init__.py b/tests/integration/integrations/hyperai/orchestrators/__init__.py new file mode 100644 index 0000000000..cd90a82cfc --- /dev/null +++ b/tests/integration/integrations/hyperai/orchestrators/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/tests/integration/integrations/hyperai/orchestrators/test_hyperai_orchestrator.py b/tests/integration/integrations/hyperai/orchestrators/test_hyperai_orchestrator.py new file mode 100644 index 0000000000..b0a7fa88e1 --- /dev/null +++ b/tests/integration/integrations/hyperai/orchestrators/test_hyperai_orchestrator.py @@ -0,0 +1,89 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +from datetime import datetime +from uuid import uuid4 + +from zenml.enums import StackComponentType +from zenml.integrations.hyperai.flavors.hyperai_orchestrator_flavor import ( + HyperAIOrchestratorConfig, +) +from zenml.integrations.hyperai.orchestrators.hyperai_orchestrator import ( + HyperAIOrchestrator, +) + + +def test_hyperai_orchestrator_attributes(): + """Tests that the basic attributes of the HyperAI orchestrator are set correctly.""" + orchestrator = HyperAIOrchestrator( + name="", + id=uuid4(), + config=HyperAIOrchestratorConfig(), + flavor="hyperai", + type=StackComponentType.ORCHESTRATOR, + user=uuid4(), + workspace=uuid4(), + created=datetime.now(), + updated=datetime.now(), + ) + + assert orchestrator.type == StackComponentType.ORCHESTRATOR + assert orchestrator.flavor == "hyperai" + assert orchestrator.config.is_remote is True + assert orchestrator.config.container_registry_autologin is False + assert orchestrator.config.automatic_cleanup_pipeline_files is True + assert orchestrator.config.gpu_enabled_in_container is True + + +def test_validate_mount_path(): + """Tests that only valid mount paths are accepted by the HyperAI orchestrator.""" + orchestrator = HyperAIOrchestrator( + name="", + id=uuid4(), + config=HyperAIOrchestratorConfig(), + flavor="hyperai", + type=StackComponentType.ORCHESTRATOR, + user=uuid4(), + workspace=uuid4(), + created=datetime.now(), + updated=datetime.now(), + ) + + # Valid POSIX path + valid_posix_path = "/mnt/hello/there" + assert ( + orchestrator._validate_mount_path(valid_posix_path) == valid_posix_path + ) + + # Valid Windows path + valid_windows_path = r"C:\\Users\\user\\Documents" + assert ( + orchestrator._validate_mount_path(valid_windows_path) + == valid_windows_path + ) + + # Invalid POSIX path + invalid_posix_path = "echo '>something>' ; /mnt/hello/there/.." + try: + orchestrator._validate_mount_path(invalid_posix_path) + except RuntimeError: + pass + + # Invalid Windows path + invalid_windows_path = ( + "set SOMETHING=123; C:\\Users\\user\\Documents\\..\\file.txt" + ) + try: + orchestrator._validate_mount_path(invalid_windows_path) + except RuntimeError: + pass From a0a12e8440d0b1c1b925a40f67a9a51f44eee39b Mon Sep 17 00:00:00 2001 From: Christian Versloot Date: Tue, 6 Feb 2024 13:28:20 +0100 Subject: [PATCH 20/27] Let contributors shield link to ZenML contributors (#2400) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index efca4d883a..380a3d7506 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ [contributors-shield]: https://img.shields.io/github/contributors/zenml-io/zenml?color=7A3EF4 -[contributors-url]: https://github.com/othneildrew/Best-README-Template/graphs/contributors +[contributors-url]: https://github.com/zenml-io/zenml/graphs/contributors [license-shield]: https://img.shields.io/github/license/zenml-io/zenml?color=9565F6 From 479b29255f34000f959fc8679dc2d763c13c19f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bar=C4=B1=C5=9F=20Can=20Durak?= <36421093+bcdurak@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:03:31 +0100 Subject: [PATCH 21/27] Prepare release 0.55.2 (#2401) * Prepare release 0.55.2 * Auto-update of Starter template --------- Co-authored-by: GitHub Actions --- README.md | 2 +- RELEASE_NOTES.md | 37 +++++++++++++++++++ examples/quickstart/quickstart.ipynb | 10 ++--- pyproject.toml | 2 +- src/zenml/VERSION | 2 +- src/zenml/zen_server/deploy/helm/Chart.yaml | 2 +- src/zenml/zen_server/deploy/helm/README.md | 4 +- .../migrations/versions/0.55.2_release.py | 24 ++++++++++++ 8 files changed, 72 insertions(+), 11 deletions(-) create mode 100644 src/zenml/zen_stores/migrations/versions/0.55.2_release.py diff --git a/README.md b/README.md index 380a3d7506..9da61089ce 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ Meet the Team

- ๐ŸŽ‰ Version 0.55.1 is out. Check out the release notes + ๐ŸŽ‰ Version 0.55.2 is out. Check out the release notes here.

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index ea25dd0b5c..52218bd6ed 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,42 @@ +# 0.55.2 + +This patch comes with a variety of new features, bug-fixes, and documentation updates. + +Some of the most important changes include: + +- The ability to add tags to outputs through the step context +- Allowing the secret stores to utilize the implicit authentication method of AWS/GCP/Azure Service Connectors +- [Lazy loading client methods](https://docs.zenml.io/v/docs/user-guide/advanced-guide/data-management/late-materialization) in a pipeline context +- Updates on the Vertex orchestrator to switch to the native VertexAI scheduler +- The new [HyperAI](https://hyperai.ai) integration featuring a new orchestrator and service connector +- Bumping the mlflow version to 2.10.0 + +We'd like to give a special thanks to @christianversloot and @francoisserra for their contributions. + +## What's Changed +* `0.55.1` in migration testing by @avishniakov in https://github.com/zenml-io/zenml/pull/2368 +* Credential-less AWS/GCP/Azure Secrets Store support by @stefannica in https://github.com/zenml-io/zenml/pull/2365 +* Small docs updates by @strickvl in https://github.com/zenml-io/zenml/pull/2359 +* generic `Client()` getters lazy loading by @avishniakov in https://github.com/zenml-io/zenml/pull/2323 +* Added slack settings OSSK-382 by @htahir1 in https://github.com/zenml-io/zenml/pull/2378 +* Label triggered slow ci by @avishniakov in https://github.com/zenml-io/zenml/pull/2379 +* Remove unused `is-slow-ci` input from fast and slow integration testing by @strickvl in https://github.com/zenml-io/zenml/pull/2382 +* Add deprecation warning for `ExternalArtifact` non-value features by @avishniakov in https://github.com/zenml-io/zenml/pull/2375 +* Add telemetry pipeline run ends by @htahir1 in https://github.com/zenml-io/zenml/pull/2377 +* Updating the `update_model` decorator by @bcdurak in https://github.com/zenml-io/zenml/pull/2136 +* Mocked API docs building by @avishniakov in https://github.com/zenml-io/zenml/pull/2360 +* Add outputs tags function by @avishniakov in https://github.com/zenml-io/zenml/pull/2383 +* Bump mlflow to v2.10.0 by @christianversloot in https://github.com/zenml-io/zenml/pull/2374 +* Fix sharing of model versions by @schustmi in https://github.com/zenml-io/zenml/pull/2380 +* Fix GCP service connector login to overwrite existing valid credentials by @stefannica in https://github.com/zenml-io/zenml/pull/2392 +* Update `has_custom_name` for legacy artifacts by @avishniakov in https://github.com/zenml-io/zenml/pull/2384 +* Use native VertexAI scheduler capability instead of old GCP official workaround by @francoisserra in https://github.com/zenml-io/zenml/pull/2310 +* HyperAI integration: orchestrator and service connector by @christianversloot in https://github.com/zenml-io/zenml/pull/2372 + +**Full Changelog**: https://github.com/zenml-io/zenml/compare/0.55.1...0.55.2 + # 0.55.1 **If you are actively using the Model Control Plane features, we suggest that you directly upgrade to 0.55.1, bypassing 0.55.0.** diff --git a/examples/quickstart/quickstart.ipynb b/examples/quickstart/quickstart.ipynb index 03ef6b283f..ee14d95ef7 100644 --- a/examples/quickstart/quickstart.ipynb +++ b/examples/quickstart/quickstart.ipynb @@ -628,8 +628,8 @@ " dataset_trn, dataset_tst = feature_engineering()\n", " else:\n", " # Load the datasets from an older pipeline\n", - " dataset_trn = client.get_artifact_version(id=train_dataset_id)\n", - " dataset_tst = client.get_artifact_version(id=test_dataset_id) \n", + " dataset_trn = client.get_artifact_version(name_id_or_prefix=train_dataset_id)\n", + " dataset_tst = client.get_artifact_version(name_id_or_prefix=test_dataset_id) \n", "\n", " trained_model = model_trainer(\n", " dataset_trn=dataset_trn,\n", @@ -970,8 +970,8 @@ "@pipeline\n", "def inference(preprocess_pipeline_id: UUID):\n", " \"\"\"Model batch inference pipeline\"\"\"\n", - " # random_state = client.get_artifact_version(id=preprocess_pipeline_id).metadata[\"random_state\"].value\n", - " # target = client.get_artifact_version(id=preprocess_pipeline_id).run_metadata['target'].value\n", + " # random_state = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).metadata[\"random_state\"].value\n", + " # target = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).run_metadata['target'].value\n", " random_state = 42\n", " target = \"target\"\n", "\n", @@ -981,7 +981,7 @@ " df_inference = inference_preprocessor(\n", " dataset_inf=df_inference,\n", " # We use the preprocess pipeline from the feature engineering pipeline\n", - " preprocess_pipeline=client.get_artifact_version(id=preprocess_pipeline_id),\n", + " preprocess_pipeline=client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id),\n", " target=target,\n", " )\n", " inference_predict(\n", diff --git a/pyproject.toml b/pyproject.toml index 0b7d39f1f4..a784baf272 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "zenml" -version = "0.55.1" +version = "0.55.2" packages = [{ include = "zenml", from = "src" }] description = "ZenML: Write production-ready ML code." authors = ["ZenML GmbH "] diff --git a/src/zenml/VERSION b/src/zenml/VERSION index b0b5281b5f..df174c967e 100644 --- a/src/zenml/VERSION +++ b/src/zenml/VERSION @@ -1 +1 @@ -0.55.1 +0.55.2 diff --git a/src/zenml/zen_server/deploy/helm/Chart.yaml b/src/zenml/zen_server/deploy/helm/Chart.yaml index 430cdedcab..859ae4e1bd 100644 --- a/src/zenml/zen_server/deploy/helm/Chart.yaml +++ b/src/zenml/zen_server/deploy/helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: zenml -version: "0.55.1" +version: "0.55.2" description: Open source MLOps framework for portable production ready ML pipelines keywords: - mlops diff --git a/src/zenml/zen_server/deploy/helm/README.md b/src/zenml/zen_server/deploy/helm/README.md index 804b020c4e..9572d85cf2 100644 --- a/src/zenml/zen_server/deploy/helm/README.md +++ b/src/zenml/zen_server/deploy/helm/README.md @@ -20,8 +20,8 @@ ZenML is an open-source MLOps framework designed to help you create robust, main To install the ZenML chart directly from Amazon ECR, use the following command: ```bash -# example command for version 0.55.1 -helm install my-zenml oci://public.ecr.aws/zenml/zenml --version 0.55.1 +# example command for version 0.55.2 +helm install my-zenml oci://public.ecr.aws/zenml/zenml --version 0.55.2 ``` Note: Ensure you have OCI support enabled in your Helm client and that you are authenticated with Amazon ECR. diff --git a/src/zenml/zen_stores/migrations/versions/0.55.2_release.py b/src/zenml/zen_stores/migrations/versions/0.55.2_release.py new file mode 100644 index 0000000000..adfe013674 --- /dev/null +++ b/src/zenml/zen_stores/migrations/versions/0.55.2_release.py @@ -0,0 +1,24 @@ +"""Release [0.55.2]. + +Revision ID: 0.55.2 +Revises: 0.55.1 +Create Date: 2024-02-06 11:32:02.715174 + +""" + + +# revision identifiers, used by Alembic. +revision = "0.55.2" +down_revision = "0.55.1" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + """Upgrade database schema and/or data, creating a new revision.""" + pass + + +def downgrade() -> None: + """Downgrade database schema and/or data back to the previous revision.""" + pass From e51297c54a81af174139d680d45b981e99a420b6 Mon Sep 17 00:00:00 2001 From: Alex Strick van Linschoten Date: Tue, 6 Feb 2024 16:22:26 +0100 Subject: [PATCH 22/27] update TOC (#2406) --- docs/book/toc.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/book/toc.md b/docs/book/toc.md index 4d2df6de5b..b560ade1ef 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -94,6 +94,7 @@ * [Tekton Orchestrator](stacks-and-components/component-guide/orchestrators/tekton.md) * [Airflow Orchestrator](stacks-and-components/component-guide/orchestrators/airflow.md) * [Skypilot VM Orchestrator](stacks-and-components/component-guide/orchestrators/skypilot-vm.md) + * [HyperAI Orchestrator](stacks-and-components/component-guide/orchestrators/hyperai.md) * [Develop a custom orchestrator](stacks-and-components/component-guide/orchestrators/custom.md) * [Artifact Stores](stacks-and-components/component-guide/artifact-stores/artifact-stores.md) * [Local Artifact Store](stacks-and-components/component-guide/artifact-stores/local.md) From 4c284feb56cb47dffcdc6ff10f49d272737823e3 Mon Sep 17 00:00:00 2001 From: Alex Strick van Linschoten Date: Tue, 20 Feb 2024 09:22:44 +0100 Subject: [PATCH 23/27] ubuntu-dind-runners for release workflow --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fe21a69b22..dde66ae4ee 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,12 +9,12 @@ jobs: setup-and-test: uses: ./.github/workflows/unit-test.yml with: - os: ubuntu-latest + os: ubuntu-dind-runners python-version: '3.8' secrets: inherit mlstacks-compatibility-check: needs: setup-and-test - runs-on: ubuntu-latest + runs-on: ubuntu-dind-runners steps: - name: Checkout code uses: actions/checkout@v4.1.1 From c34b38936c839c15ac1e432ec0286800cc920b5c Mon Sep 17 00:00:00 2001 From: kabinja Date: Tue, 20 Feb 2024 21:50:41 +0100 Subject: [PATCH 24/27] fix bug comparing id in exit method --- src/zenml/stack/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index 6a85dab807..a8c37741f2 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -168,5 +168,5 @@ def __exit__( e.g., divide_by_zero error. None if no exception. traceback: Traceback report. None if no exception. """ - if self._default_stack.id != Client().active_stack: + if self._default_stack.id != Client().active_stack.id: Client().activate_stack(self._default_stack.id) From 876a9fbcd6f71f07be44aa2df86441bc8b53a813 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 26 Feb 2024 20:33:21 +0100 Subject: [PATCH 25/27] rename active_stack to stack in pipeline run configuration --- .../pipelining-features/configure-steps-pipelines.md | 2 +- src/zenml/config/pipeline_run_configuration.py | 2 +- tests/integration/functional/cli/test_pipeline.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md index c8d42c199b..375b599cc2 100644 --- a/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md +++ b/docs/book/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines.md @@ -132,7 +132,7 @@ python run.py An example of a generated YAML configuration template ```yaml -active_stack: Optional[str] +stack: Optional[str] build: Union[PipelineBuildBase, UUID, NoneType] enable_artifact_metadata: Optional[bool] enable_artifact_visualization: Optional[bool] diff --git a/src/zenml/config/pipeline_run_configuration.py b/src/zenml/config/pipeline_run_configuration.py index dcf1d454ac..21801dfa73 100644 --- a/src/zenml/config/pipeline_run_configuration.py +++ b/src/zenml/config/pipeline_run_configuration.py @@ -29,7 +29,7 @@ class PipelineRunConfiguration( ): """Class for pipeline run configurations.""" - active_stack: Optional[str] = None + stack: Optional[str] = None run_name: Optional[str] = None enable_cache: Optional[bool] = None enable_artifact_metadata: Optional[bool] = None diff --git a/tests/integration/functional/cli/test_pipeline.py b/tests/integration/functional/cli/test_pipeline.py index 3304b4fd51..ad507284ac 100644 --- a/tests/integration/functional/cli/test_pipeline.py +++ b/tests/integration/functional/cli/test_pipeline.py @@ -385,7 +385,7 @@ def test_pipeline_run_with_different_stack_in_config_file( config_path = tmp_path / "config.yaml" run_config = PipelineRunConfiguration( - run_name="custom_run_name", active_stack=str(new_stack.id) + run_name="custom_run_name", stack=str(new_stack.id) ) config_path.write_text(run_config.yaml()) From d07e90c1db6a5587e66e6ee8e59f03c90e437450 Mon Sep 17 00:00:00 2001 From: kabinja Date: Mon, 26 Feb 2024 20:43:11 +0100 Subject: [PATCH 26/27] replace stack_context to use temporary_active_stack --- src/zenml/new/pipelines/pipeline.py | 13 +++++++----- src/zenml/stack/utils.py | 33 +---------------------------- 2 files changed, 9 insertions(+), 37 deletions(-) diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index 944a19f1ee..96140e14a4 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -75,7 +75,6 @@ prepare_model_versions, ) from zenml.stack import Stack -from zenml.stack.utils import stack_context from zenml.steps import BaseStep from zenml.steps.entrypoint_function_utils import ( StepArtifact, @@ -537,9 +536,11 @@ def build( Returns: The build output. """ + from zenml.cli.utils import temporary_active_stack + with track_handler( event=AnalyticsEvent.BUILD_PIPELINE - ), stack_context(): + ), temporary_active_stack(): self._prepare_if_possible() deployment, pipeline_spec, _, _ = self._compile( config_path=config_path, @@ -607,6 +608,8 @@ def _run( Model of the pipeline run if running without a schedule, `None` if running with a schedule. """ + from zenml.cli.utils import temporary_active_stack + if constants.SHOULD_PREVENT_PIPELINE_EXECUTION: # An environment variable was set to stop the execution of # pipelines. This is done to prevent execution of module-level @@ -624,7 +627,7 @@ def _run( with track_handler( AnalyticsEvent.RUN_PIPELINE - ) as analytics_handler, stack_context(): + ) as analytics_handler, temporary_active_stack(): deployment, pipeline_spec, schedule, build = self._compile( config_path=config_path, run_name=run_name, @@ -1453,5 +1456,5 @@ def _update_stack_from_config( Args: run_configuration: The run configuration for this pipeline. """ - if run_configuration.active_stack is not None: - Client().activate_stack(run_configuration.active_stack) + if run_configuration.stack is not None: + Client().activate_stack(run_configuration.stack) diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index a8c37741f2..fa1cc7da77 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -13,8 +13,7 @@ # permissions and limitations under the License. """Util functions for handling stacks, components, and flavors.""" -from types import TracebackType -from typing import Any, Dict, Optional, Type +from typing import Any, Dict, Optional from zenml.client import Client from zenml.enums import StackComponentType, StoreType @@ -140,33 +139,3 @@ def get_flavor_by_name_and_type_from_zen_store( f"'{component_type}' exists." ) return flavors[0] - - -class stack_context: - """Context handler to reset the original active stack.""" - - def __init__(self) -> None: - """Constructor for stack_context saves active stack.""" - self._default_stack = Client().active_stack - - def __enter__(self) -> None: - """Enters in the stack context.""" - pass - - def __exit__( - self, - exception_type: Optional[Type[BaseException]], - exception_value: Optional[BaseException], - traceback: Optional[TracebackType], - ) -> None: - """Get a stack component flavor by name and type from a ZenStore. - - Args: - exception_type: Type of the exception that was raised. - None if no exception. - exception_value: Type of exception that was raised. - e.g., divide_by_zero error. None if no exception. - traceback: Traceback report. None if no exception. - """ - if self._default_stack.id != Client().active_stack.id: - Client().activate_stack(self._default_stack.id) From 951ea5bdfdeeb5a939950e51deacc815fef1f43a Mon Sep 17 00:00:00 2001 From: kabinja Date: Sun, 24 Mar 2024 16:16:54 +0100 Subject: [PATCH 27/27] temporary_active_stack --- src/zenml/cli/pipeline.py | 5 ++-- src/zenml/cli/utils.py | 31 ----------------------- src/zenml/new/pipelines/pipeline.py | 5 +--- src/zenml/stack/utils.py | 30 +++++++++++++++++++++- tests/integration/functional/cli/utils.py | 6 ++--- 5 files changed, 35 insertions(+), 42 deletions(-) diff --git a/src/zenml/cli/pipeline.py b/src/zenml/cli/pipeline.py index f667e8c4f0..4e9e34b887 100644 --- a/src/zenml/cli/pipeline.py +++ b/src/zenml/cli/pipeline.py @@ -34,6 +34,7 @@ ScheduleFilter, ) from zenml.new.pipelines.pipeline import Pipeline +from zenml.stack.utils import temporary_active_stack from zenml.utils import source_utils, uuid_utils from zenml.utils.yaml_utils import write_yaml @@ -184,7 +185,7 @@ def build_pipeline( name_id_or_prefix=pipeline_name_or_id, version=version ) - with cli_utils.temporary_active_stack(stack_name_or_id=stack_name_or_id): + with temporary_active_stack(stack_name_or_id=stack_name_or_id): pipeline_instance = Pipeline.from_model(pipeline_model) build = pipeline_instance.build(config_path=config_path) @@ -286,7 +287,7 @@ def run_pipeline( "or file path." ) - with cli_utils.temporary_active_stack(stack_name_or_id=stack_name_or_id): + with temporary_active_stack(stack_name_or_id=stack_name_or_id): pipeline_instance = Pipeline.from_model(pipeline_model) pipeline_instance = pipeline_instance.with_options( config_path=config_path, diff --git a/src/zenml/cli/utils.py b/src/zenml/cli/utils.py index 09cd25eb48..c6879f42e0 100644 --- a/src/zenml/cli/utils.py +++ b/src/zenml/cli/utils.py @@ -13,7 +13,6 @@ # permissions and limitations under the License. """Utility functions for the CLI.""" -import contextlib import datetime import json import os @@ -26,7 +25,6 @@ Any, Callable, Dict, - Iterator, List, NoReturn, Optional, @@ -78,8 +76,6 @@ from zenml.zen_server.deploy import ServerDeployment if TYPE_CHECKING: - from uuid import UUID - from rich.text import Text from zenml.enums import ExecutionStatus @@ -2481,33 +2477,6 @@ def wrapper(function: F) -> F: return inner_decorator -@contextlib.contextmanager -def temporary_active_stack( - stack_name_or_id: Union["UUID", str, None] = None, -) -> Iterator["Stack"]: - """Contextmanager to temporarily activate a stack. - - Args: - stack_name_or_id: The name or ID of the stack to activate. If not given, - this contextmanager will not do anything. - - Yields: - The active stack. - """ - from zenml.client import Client - - try: - if stack_name_or_id: - old_stack_id = Client().active_stack_model.id - Client().activate_stack(stack_name_or_id) - else: - old_stack_id = None - yield Client().active_stack - finally: - if old_stack_id: - Client().activate_stack(old_stack_id) - - def get_package_information( package_names: Optional[List[str]] = None, ) -> Dict[str, str]: diff --git a/src/zenml/new/pipelines/pipeline.py b/src/zenml/new/pipelines/pipeline.py index d02af933d2..c7803f2665 100644 --- a/src/zenml/new/pipelines/pipeline.py +++ b/src/zenml/new/pipelines/pipeline.py @@ -76,6 +76,7 @@ prepare_model_versions, ) from zenml.stack import Stack +from zenml.stack.utils import temporary_active_stack from zenml.steps import BaseStep from zenml.steps.entrypoint_function_utils import ( StepArtifact, @@ -537,8 +538,6 @@ def build( Returns: The build output. """ - from zenml.cli.utils import temporary_active_stack - with track_handler( event=AnalyticsEvent.BUILD_PIPELINE ), temporary_active_stack(): @@ -609,8 +608,6 @@ def _run( Model of the pipeline run if running without a schedule, `None` if running with a schedule. """ - from zenml.cli.utils import temporary_active_stack - if constants.SHOULD_PREVENT_PIPELINE_EXECUTION: # An environment variable was set to stop the execution of # pipelines. This is done to prevent execution of module-level diff --git a/src/zenml/stack/utils.py b/src/zenml/stack/utils.py index fa1cc7da77..6f41a64aca 100644 --- a/src/zenml/stack/utils.py +++ b/src/zenml/stack/utils.py @@ -13,13 +13,16 @@ # permissions and limitations under the License. """Util functions for handling stacks, components, and flavors.""" -from typing import Any, Dict, Optional +import contextlib +from typing import Any, Dict, Generator, Optional, Union +from uuid import UUID from zenml.client import Client from zenml.enums import StackComponentType, StoreType from zenml.logger import get_logger from zenml.models import FlavorFilter, FlavorResponse from zenml.stack.flavor import Flavor +from zenml.stack.stack import Stack from zenml.stack.stack_component import StackComponentConfig from zenml.zen_stores.base_zen_store import BaseZenStore @@ -139,3 +142,28 @@ def get_flavor_by_name_and_type_from_zen_store( f"'{component_type}' exists." ) return flavors[0] + + +@contextlib.contextmanager +def temporary_active_stack( + stack_name_or_id: Union[UUID, str, None] = None, +) -> Generator[Stack, Any, Any]: + """Contextmanager to temporarily activate a stack. + + Args: + stack_name_or_id: The name or ID of the stack to activate. If not given, + this contextmanager will not do anything. + + Yields: + The active stack. + """ + try: + if stack_name_or_id: + old_stack_id = Client().active_stack_model.id + Client().activate_stack(stack_name_or_id) + else: + old_stack_id = None + yield Client().active_stack + finally: + if old_stack_id: + Client().activate_stack(old_stack_id) diff --git a/tests/integration/functional/cli/utils.py b/tests/integration/functional/cli/utils.py index 228f6149b6..d4634ce395 100644 --- a/tests/integration/functional/cli/utils.py +++ b/tests/integration/functional/cli/utils.py @@ -16,10 +16,7 @@ from tests.harness.harness import TestHarness from zenml.cli import cli -from zenml.cli.utils import ( - parse_name_and_extra_arguments, - temporary_active_stack, -) +from zenml.cli.utils import parse_name_and_extra_arguments from zenml.client import Client from zenml.models import ( TagFilter, @@ -27,6 +24,7 @@ UserResponse, WorkspaceResponse, ) +from zenml.stack.utils import temporary_active_stack from zenml.utils.string_utils import random_str SAMPLE_CUSTOM_ARGUMENTS = [