From 28dd2bf771df1d702bb92e03802e6b59a28fcf6e Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Fri, 9 Sep 2022 11:12:45 +0200 Subject: [PATCH] Kubeflow workaround added (#886) * Kubeflow workaround added * Add missing host Co-authored-by: Michael Schuster --- .../mlops-stacks/orchestrators/kubeflow.md | 79 ++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/docs/book/mlops-stacks/orchestrators/kubeflow.md b/docs/book/mlops-stacks/orchestrators/kubeflow.md index 4ac20bfcf9..57cc9500f9 100644 --- a/docs/book/mlops-stacks/orchestrators/kubeflow.md +++ b/docs/book/mlops-stacks/orchestrators/kubeflow.md @@ -198,4 +198,81 @@ A concrete example of using the Kubeflow orchestrator can be found [here](https://github.com/zenml-io/zenml/tree/main/examples/kubeflow_pipelines_orchestration). For more information and a full list of configurable attributes of the Kubeflow orchestrator, check out the -[API Docs](https://apidocs.zenml.io/latest/api_docs/integrations/#zenml.integrations.kubeflow.orchestrators.kubeflow_orchestrator.KubeflowOrchestrator). \ No newline at end of file +[API Docs](https://apidocs.zenml.io/latest/api_docs/integrations/#zenml.integrations.kubeflow.orchestrators.kubeflow_orchestrator.KubeflowOrchestrator). + +## Important Note for Multi-Tenancy Deployments + +Kubeflow has a notion of [multi-tenancy](https://www.kubeflow.org/docs/components/multi-tenancy/overview/) +built into its deployment. Kubeflow’s multi-user isolation simplifies user +operations because each user only views and edited\s the Kubeflow components +and model artifacts defined in their configuration. + +Currently, the default ZenML Kubeflow orchestrator yields the following error +when running a pipeline: + +```shell +HTTP response body: {"error":"Invalid input error: Invalid resource references for experiment. ListExperiment requires filtering by namespace.","code":3,"message":"Invalid input error: Invalid resource references for experiment. ListExperiment requires filtering by +namespace.","details":[{"@type":"type.googleapis.com/api.Error","error_message":"Invalid resource references for experiment. ListExperiment requires filtering by namespace.","error_details":"Invalid input error: Invalid resource references for experiment. ListExperiment requires filtering by namespace."}]} +``` + +The current workaround is as follows: + +```python +import json +import os +import kfp + +NAMESPACE = "namespace_name" # set this +USERNAME = "foo" # set this +PASSWORD = "bar" # set this +HOST = "https://qux.com" # set this +KFP_CONFIG = '~/.config/kfp/context.json' # set this manually if you'd like + +def get_kfp_token(username: str, password: str) -> str: + """Get token for kubeflow authentication.""" + session = requests.Session() + response = session.get(HOST) + headers = { + "Content-Type": "application/x-www-form-urlencoded", + } + data = {"login": username, "password": password} + session.post(response.url, headers=headers, data=data) + session_cookie = session.cookies.get_dict()["authservice_session"] + return session_cookie + +token = get_kfp_token() +cookies = 'authservice_session=' + token + +# 1: Set user namespace globally +kfp.Client(host=HOST, cookies=cookies).set_user_namespace(NAMESPACE) + +# 2: Set cookie globally in the kfp config file +with open(KFP_CONFIG, 'r') as f: + data = json.load(f) + data['client_authentication_cookie'] = cookies + +os.remove(KFP_CONFIG) +with open(KFP_CONFIG, 'w') as f: + json.dump(data, f) + +# Continue with your normal pipeline code.. +``` + +Please note that in the above code, `HOST` should be registered on orchestration registration, +with the `kubeflow_hostname` parameter: + +``` +export HOST=https://qux.com +zenml orchestrator register multi_tenant_kf --flavor=kubeflow \ + --kubeflow_hostname=$(HOST)/pipeline # /pipeline is important! + --other_params.. +``` +In future ZenML versions, multi-tenancy will be natively supported. See this +[Slack thread](https://zenml.slack.com/archives/C01FWQ5D0TT/p1662545810395779) for more details +on how the above workaround came to effect. + +Please note that the above is all to initialize the `kfp.Client()` class in the standard orchestrator logic. +This code can be seen [here](https://github.com/zenml-io/zenml/blob/main/src/zenml/integrations/kubeflow/orchestrators/kubeflow_orchestrator.py#L709). + +You can simply override this logic and add your custom authentication scheme if needed. Read [here](custom.md) +for more details on how to create a custom orchestrator. \ No newline at end of file