From f8f113deece9fdeb537ec6fc30d26bd5dc6c2347 Mon Sep 17 00:00:00 2001 From: aparna_kesarkar Date: Wed, 21 Apr 2021 02:39:44 -0700 Subject: [PATCH 1/9] pass datasource object to compare_training_runs() to filter by datasource_name --- zenml/cli/pipeline.py | 4 ++-- zenml/repo/repo.py | 4 ++-- zenml/utils/post_training/compare.py | 12 ++++++++---- zenml/utils/post_training/post_training_utils.py | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/zenml/cli/pipeline.py b/zenml/cli/pipeline.py index 938e6fd3a2..7bbd1641f7 100644 --- a/zenml/cli/pipeline.py +++ b/zenml/cli/pipeline.py @@ -33,10 +33,10 @@ def pipeline(): @pipeline.command('compare') @pass_repo -def compare_training_runs(repo: Repository): +def compare_training_runs(repo: Repository, datasource=None): """Compares pipelines in repo""" click.echo('Comparing training pipelines in repo: Starting app..') - repo.compare_training_runs() + repo.compare_training_runs(datasource) @pipeline.command('list') diff --git a/zenml/repo/repo.py b/zenml/repo/repo.py index 2530a73a21..fc49b7ec7b 100644 --- a/zenml/repo/repo.py +++ b/zenml/repo/repo.py @@ -376,11 +376,11 @@ def load_pipeline_config(self, file_name: Text) -> Dict[Text, Any]: pipelines_dir = self.zenml_config.get_pipelines_dir() return yaml_utils.read_yaml(os.path.join(pipelines_dir, file_name)) - def compare_training_runs(self, port: int = 0): + def compare_training_runs(self, port: int = 0, datasource=None): """Launch the compare app for all training pipelines in repo""" from zenml.utils.post_training.post_training_utils import \ launch_compare_tool - launch_compare_tool(port) + launch_compare_tool(port, datasource) def clean(self): """Deletes associated metadata store, pipelines dir and artifacts""" diff --git a/zenml/utils/post_training/compare.py b/zenml/utils/post_training/compare.py index ad94342cce..3adcf5044c 100644 --- a/zenml/utils/post_training/compare.py +++ b/zenml/utils/post_training/compare.py @@ -31,16 +31,20 @@ class Application(param.Parameterized): slicing_metric_selector = param.ObjectSelector(default='', objects=['']) performance_metric_selector = param.ObjectSelector(objects=[]) - def __init__(self, **params): + def __init__(self, datasource, **params): super(Application, self).__init__(**params) # lists result_list = [] hparam_list = [] repo: Repository = Repository.get_instance() + datasource = datasource # get all pipelines in this workspace - all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type([ + if datasource: + all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_datasource([datasource]) + else: + all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type([ TrainingPipeline.PIPELINE_TYPE]) # get a dataframe of all results + all hyperparameter combinations @@ -177,8 +181,8 @@ def parameter_graph(self): return fig -def generate_interface(): - app = Application() +def generate_interface(datasource=None): + app = Application(datasource=datasource) handlers = pn.Param(app.param) # Analysis Page diff --git a/zenml/utils/post_training/post_training_utils.py b/zenml/utils/post_training/post_training_utils.py index 09cd937467..40c4ef5294 100644 --- a/zenml/utils/post_training/post_training_utils.py +++ b/zenml/utils/post_training/post_training_utils.py @@ -336,7 +336,7 @@ def evaluate_single_pipeline( os.system(f'jupyter notebook {final_out_path} --port {port}') -def launch_compare_tool(port: int = 0): +def launch_compare_tool(port: int = 0, datasource=None): """Launches `compare` tool for comparing multiple training pipelines.""" # assumes compare.py in the same folder template = \ From 54ad27e01cb710cec2f9ddc0322aec6199fc0520 Mon Sep 17 00:00:00 2001 From: aparna_kesarkar Date: Wed, 21 Apr 2021 12:38:26 -0700 Subject: [PATCH 2/9] pass datasource object to compare_training_runs() to filter by datasource_name --- zenml/utils/post_training/compare.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zenml/utils/post_training/compare.py b/zenml/utils/post_training/compare.py index 3adcf5044c..4645c183d9 100644 --- a/zenml/utils/post_training/compare.py +++ b/zenml/utils/post_training/compare.py @@ -172,6 +172,8 @@ def parameter_graph(self): cmax=max( final_col)), dimensions=new_dims)) + + else: fig = px.scatter(pd.DataFrame(), marginal_y='rug', From e8a2e5e2b8a0628daff90f982e410bdd563e43c6 Mon Sep 17 00:00:00 2001 From: aparna_kesarkar Date: Thu, 22 Apr 2021 03:21:23 -0700 Subject: [PATCH 3/9] styling changes resolved --- zenml/cli/pipeline.py | 3 ++- zenml/repo/repo.py | 3 ++- zenml/utils/post_training/compare.py | 4 ++-- zenml/utils/post_training/post_training_utils.py | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/zenml/cli/pipeline.py b/zenml/cli/pipeline.py index 7bbd1641f7..560c2fb8b9 100644 --- a/zenml/cli/pipeline.py +++ b/zenml/cli/pipeline.py @@ -23,6 +23,7 @@ from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.utils.yaml_utils import read_yaml +from zenml.datasources import BaseDatasource @cli.group() @@ -33,7 +34,7 @@ def pipeline(): @pipeline.command('compare') @pass_repo -def compare_training_runs(repo: Repository, datasource=None): +def compare_training_runs(repo: Repository, datasource: BaseDatasource = None): """Compares pipelines in repo""" click.echo('Comparing training pipelines in repo: Starting app..') repo.compare_training_runs(datasource) diff --git a/zenml/repo/repo.py b/zenml/repo/repo.py index fc49b7ec7b..a8b09f3a8e 100644 --- a/zenml/repo/repo.py +++ b/zenml/repo/repo.py @@ -24,6 +24,7 @@ from zenml.repo.constants import ZENML_DIR_NAME from zenml.standards import standard_keys as keys from zenml.utils import path_utils, yaml_utils +from zenml.datasources import BaseDatasource from zenml.utils.analytics_utils import track, CREATE_REPO, GET_PIPELINES, \ GET_DATASOURCES, GET_STEPS_VERSIONS, \ REGISTER_PIPELINE, GET_STEP_VERSION @@ -376,7 +377,7 @@ def load_pipeline_config(self, file_name: Text) -> Dict[Text, Any]: pipelines_dir = self.zenml_config.get_pipelines_dir() return yaml_utils.read_yaml(os.path.join(pipelines_dir, file_name)) - def compare_training_runs(self, port: int = 0, datasource=None): + def compare_training_runs(self, port: int = 0, datasource: BaseDatasource = None): """Launch the compare app for all training pipelines in repo""" from zenml.utils.post_training.post_training_utils import \ launch_compare_tool diff --git a/zenml/utils/post_training/compare.py b/zenml/utils/post_training/compare.py index 4645c183d9..6743e63e2e 100644 --- a/zenml/utils/post_training/compare.py +++ b/zenml/utils/post_training/compare.py @@ -42,7 +42,9 @@ def __init__(self, datasource, **params): # get all pipelines in this workspace if datasource: + # filter pipeline by datasource all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_datasource([datasource]) + all_pipelines = [p for p in all_pipelines if p.PIPELINE_TYPE == TrainingPipeline.PIPELINE_TYPE] else: all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type([ TrainingPipeline.PIPELINE_TYPE]) @@ -172,8 +174,6 @@ def parameter_graph(self): cmax=max( final_col)), dimensions=new_dims)) - - else: fig = px.scatter(pd.DataFrame(), marginal_y='rug', diff --git a/zenml/utils/post_training/post_training_utils.py b/zenml/utils/post_training/post_training_utils.py index 40c4ef5294..fccfd8c66d 100644 --- a/zenml/utils/post_training/post_training_utils.py +++ b/zenml/utils/post_training/post_training_utils.py @@ -34,6 +34,7 @@ from zenml.enums import GDPComponent from zenml.logger import get_logger from zenml.utils.path_utils import read_file_contents +from zenml.datasources import BaseDatasource logger = get_logger(__name__) @@ -336,7 +337,7 @@ def evaluate_single_pipeline( os.system(f'jupyter notebook {final_out_path} --port {port}') -def launch_compare_tool(port: int = 0, datasource=None): +def launch_compare_tool(port: int = 0, datasource: BaseDatasource = None): """Launches `compare` tool for comparing multiple training pipelines.""" # assumes compare.py in the same folder template = \ From 9331f65ecf14784a55ef5d0237adf97c52057d9b Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 14:40:00 +0200 Subject: [PATCH 4/9] changed how compare tool is launched --- zenml/utils/post_training/compare.py | 26 ++++++------- .../post_training/post_training_utils.py | 39 +++---------------- 2 files changed, 18 insertions(+), 47 deletions(-) diff --git a/zenml/utils/post_training/compare.py b/zenml/utils/post_training/compare.py index 6743e63e2e..197483d79f 100644 --- a/zenml/utils/post_training/compare.py +++ b/zenml/utils/post_training/compare.py @@ -7,9 +7,10 @@ import plotly.graph_objects as go import tensorflow_model_analysis as tfma +from zenml.datasources import BaseDatasource +from zenml.enums import PipelineStatusTypes, GDPComponent from zenml.pipelines import TrainingPipeline from zenml.repo import Repository -from zenml.enums import PipelineStatusTypes, GDPComponent pn.extension('plotly') @@ -31,23 +32,26 @@ class Application(param.Parameterized): slicing_metric_selector = param.ObjectSelector(default='', objects=['']) performance_metric_selector = param.ObjectSelector(objects=[]) - def __init__(self, datasource, **params): + def __init__(self, datasource: BaseDatasource = None, **params): super(Application, self).__init__(**params) # lists result_list = [] hparam_list = [] repo: Repository = Repository.get_instance() - datasource = datasource + self.datasource = datasource # get all pipelines in this workspace if datasource: - # filter pipeline by datasource - all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_datasource([datasource]) - all_pipelines = [p for p in all_pipelines if p.PIPELINE_TYPE == TrainingPipeline.PIPELINE_TYPE] + # filter pipeline by datasource, and then the training ones + all_pipelines: List[TrainingPipeline] = \ + repo.get_pipelines_by_datasource(datasource) + all_pipelines = [p for p in all_pipelines if + p.PIPELINE_TYPE == TrainingPipeline.PIPELINE_TYPE] else: - all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type([ - TrainingPipeline.PIPELINE_TYPE]) + all_pipelines: List[TrainingPipeline] = repo.get_pipelines_by_type( + [ + TrainingPipeline.PIPELINE_TYPE]) # get a dataframe of all results + all hyperparameter combinations for p in all_pipelines: @@ -183,7 +187,7 @@ def parameter_graph(self): return fig -def generate_interface(datasource=None): +def generate_interface(datasource: BaseDatasource = None): app = Application(datasource=datasource) handlers = pn.Param(app.param) @@ -201,7 +205,3 @@ def generate_interface(datasource=None): ('Analysis Page', analysis_page), ) return interface - - -platform = generate_interface() -platform.servable() diff --git a/zenml/utils/post_training/post_training_utils.py b/zenml/utils/post_training/post_training_utils.py index fccfd8c66d..648c06dda3 100644 --- a/zenml/utils/post_training/post_training_utils.py +++ b/zenml/utils/post_training/post_training_utils.py @@ -21,6 +21,7 @@ import click import nbformat as nbf import pandas as pd +import panel import panel as pn import tensorflow as tf import tensorflow_data_validation as tfdv @@ -29,12 +30,12 @@ from tensorflow_transform.tf_metadata import schema_utils from tfx.utils import io_utils -from zenml.constants import APP_NAME, EVALUATION_NOTEBOOK, \ - COMPARISON_NOTEBOOK +from zenml.constants import APP_NAME, EVALUATION_NOTEBOOK +from zenml.datasources import BaseDatasource from zenml.enums import GDPComponent from zenml.logger import get_logger from zenml.utils.path_utils import read_file_contents -from zenml.datasources import BaseDatasource +from zenml.utils.post_training.compare import generate_interface logger = get_logger(__name__) @@ -339,34 +340,4 @@ def evaluate_single_pipeline( def launch_compare_tool(port: int = 0, datasource: BaseDatasource = None): """Launches `compare` tool for comparing multiple training pipelines.""" - # assumes compare.py in the same folder - template = \ - os.path.join(os.path.abspath(os.path.dirname(__file__)), 'compare.py') - compare_cell = read_file_contents(template) - - # generate notebook - nb = nbf.v4.new_notebook() - nb['cells'] = [ - nbf.v4.new_code_cell(compare_cell), - ] - - # TODO: [LOW] Check if we can centralize this along with the one used in - # evaluate_single_pipeline() - config_folder = click.get_app_dir(APP_NAME) - if not (os.path.exists(config_folder) and os.path.isdir( - config_folder)): - os.makedirs(config_folder) - - final_out_path = os.path.join(config_folder, COMPARISON_NOTEBOOK) - s = nbf.writes(nb) - if isinstance(s, bytes): - s = s.decode('utf8') - - with open(final_out_path, 'w') as f: - f.write(s) - - # serve notebook - if port == 0: - os.system('panel serve "{}" --show'.format(final_out_path)) - else: - os.system(f'panel serve "{final_out_path}" --port {port} --show') + panel.serve(generate_interface(datasource), port=port) From 710da673dcb8eeace880c4260c2ca850d9fddcc7 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 14:52:58 +0200 Subject: [PATCH 5/9] circular dependency removed --- zenml/repo/repo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zenml/repo/repo.py b/zenml/repo/repo.py index a8b09f3a8e..fc49b7ec7b 100644 --- a/zenml/repo/repo.py +++ b/zenml/repo/repo.py @@ -24,7 +24,6 @@ from zenml.repo.constants import ZENML_DIR_NAME from zenml.standards import standard_keys as keys from zenml.utils import path_utils, yaml_utils -from zenml.datasources import BaseDatasource from zenml.utils.analytics_utils import track, CREATE_REPO, GET_PIPELINES, \ GET_DATASOURCES, GET_STEPS_VERSIONS, \ REGISTER_PIPELINE, GET_STEP_VERSION @@ -377,7 +376,7 @@ def load_pipeline_config(self, file_name: Text) -> Dict[Text, Any]: pipelines_dir = self.zenml_config.get_pipelines_dir() return yaml_utils.read_yaml(os.path.join(pipelines_dir, file_name)) - def compare_training_runs(self, port: int = 0, datasource: BaseDatasource = None): + def compare_training_runs(self, port: int = 0, datasource=None): """Launch the compare app for all training pipelines in repo""" from zenml.utils.post_training.post_training_utils import \ launch_compare_tool From a418a31c1dfe5d230e9f58612adba070dfeca2d7 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 15:00:58 +0200 Subject: [PATCH 6/9] found another ciruclar dependency --- zenml/utils/post_training/post_training_utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/zenml/utils/post_training/post_training_utils.py b/zenml/utils/post_training/post_training_utils.py index 648c06dda3..ebffb89b39 100644 --- a/zenml/utils/post_training/post_training_utils.py +++ b/zenml/utils/post_training/post_training_utils.py @@ -31,7 +31,6 @@ from tfx.utils import io_utils from zenml.constants import APP_NAME, EVALUATION_NOTEBOOK -from zenml.datasources import BaseDatasource from zenml.enums import GDPComponent from zenml.logger import get_logger from zenml.utils.path_utils import read_file_contents @@ -338,6 +337,12 @@ def evaluate_single_pipeline( os.system(f'jupyter notebook {final_out_path} --port {port}') -def launch_compare_tool(port: int = 0, datasource: BaseDatasource = None): - """Launches `compare` tool for comparing multiple training pipelines.""" +def launch_compare_tool(port: int = 0, datasource=None): + """Launches `compare` tool for comparing multiple training pipelines. + + Args: + port: Port to launch application on. + datasource (BaseDatasource): object of type BaseDatasource, to + filter only pipelines using that particular datasource. + """ panel.serve(generate_interface(datasource), port=port) From 4dfe786b0d98b81f036d76dc4f9a86236aadedf6 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 15:07:03 +0200 Subject: [PATCH 7/9] BaseDatasource causes circular depenenices so no type hints possible --- zenml/utils/post_training/compare.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zenml/utils/post_training/compare.py b/zenml/utils/post_training/compare.py index 197483d79f..01175bb29f 100644 --- a/zenml/utils/post_training/compare.py +++ b/zenml/utils/post_training/compare.py @@ -7,7 +7,6 @@ import plotly.graph_objects as go import tensorflow_model_analysis as tfma -from zenml.datasources import BaseDatasource from zenml.enums import PipelineStatusTypes, GDPComponent from zenml.pipelines import TrainingPipeline from zenml.repo import Repository @@ -32,7 +31,7 @@ class Application(param.Parameterized): slicing_metric_selector = param.ObjectSelector(default='', objects=['']) performance_metric_selector = param.ObjectSelector(objects=[]) - def __init__(self, datasource: BaseDatasource = None, **params): + def __init__(self, datasource=None, **params): super(Application, self).__init__(**params) # lists @@ -187,7 +186,7 @@ def parameter_graph(self): return fig -def generate_interface(datasource: BaseDatasource = None): +def generate_interface(datasource=None): app = Application(datasource=datasource) handlers = pn.Param(app.param) From ed2a1480d75d82c05cf14c9396e61b7b8632df07 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 15:43:42 +0200 Subject: [PATCH 8/9] took out datasource object passing from cli, we have to solve that another way --- zenml/cli/pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zenml/cli/pipeline.py b/zenml/cli/pipeline.py index 560c2fb8b9..938e6fd3a2 100644 --- a/zenml/cli/pipeline.py +++ b/zenml/cli/pipeline.py @@ -23,7 +23,6 @@ from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.utils.yaml_utils import read_yaml -from zenml.datasources import BaseDatasource @cli.group() @@ -34,10 +33,10 @@ def pipeline(): @pipeline.command('compare') @pass_repo -def compare_training_runs(repo: Repository, datasource: BaseDatasource = None): +def compare_training_runs(repo: Repository): """Compares pipelines in repo""" click.echo('Comparing training pipelines in repo: Starting app..') - repo.compare_training_runs(datasource) + repo.compare_training_runs() @pipeline.command('list') From dd3697a6c3de24639e3fd596c0ad978805534573 Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 22 Apr 2021 15:52:11 +0200 Subject: [PATCH 9/9] imports --- zenml/utils/post_training/post_training_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenml/utils/post_training/post_training_utils.py b/zenml/utils/post_training/post_training_utils.py index ebffb89b39..0225e3bc18 100644 --- a/zenml/utils/post_training/post_training_utils.py +++ b/zenml/utils/post_training/post_training_utils.py @@ -34,7 +34,6 @@ from zenml.enums import GDPComponent from zenml.logger import get_logger from zenml.utils.path_utils import read_file_contents -from zenml.utils.post_training.compare import generate_interface logger = get_logger(__name__) @@ -345,4 +344,5 @@ def launch_compare_tool(port: int = 0, datasource=None): datasource (BaseDatasource): object of type BaseDatasource, to filter only pipelines using that particular datasource. """ + from zenml.utils.post_training.compare import generate_interface panel.serve(generate_interface(datasource), port=port)