From d82a3bc11eff95c57f6a3ef6905524c8a14ba117 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Thu, 26 Dec 2024 13:16:09 -0800 Subject: [PATCH 01/18] minor cleanup to cli --- src/easylink/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/easylink/cli.py b/src/easylink/cli.py index 79bbfeec..0218fc6e 100644 --- a/src/easylink/cli.py +++ b/src/easylink/cli.py @@ -97,7 +97,7 @@ def run( logger.info("Running pipeline") results_dir = get_results_directory(output_dir, no_timestamp).as_posix() logger.info(f"Results directory: {results_dir}") - # TODO [MIC-4493]: Add configuration validation + # TODO [MIC-4493]: Add configuration validation`` main = handle_exceptions( func=runner.main, exceptions_logger=logger, with_debugger=with_debugger From c0e70094f6d2910bf2b6a790612914a3f20a176b Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Fri, 27 Dec 2024 12:30:44 -0800 Subject: [PATCH 02/18] strengthen configuration.py docs --- src/easylink/configuration.py | 1 + src/easylink/utilities/general_utils.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index d567b2d0..543e1daa 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -114,6 +114,7 @@ def __init__( self.update({"schema": self._get_schema(potential_schemas)}, layer="initial_data") self.schema.configure_pipeline(self.pipeline, self.input_data) self._validate() + breakpoint() self.freeze() @property diff --git a/src/easylink/utilities/general_utils.py b/src/easylink/utilities/general_utils.py index f5c033f2..a5806b57 100644 --- a/src/easylink/utilities/general_utils.py +++ b/src/easylink/utilities/general_utils.py @@ -93,6 +93,10 @@ def exit_with_validation_error(error_msg: dict) -> None: YAML format and terminates the program execution with a non-zero exit code (indicating an error). + This function logs the provided validation error messages using a structured + YAML format and terminates the program execution with a non-zero exit code + (indicating an error). + Parameters ---------- error_msg From 9a20e381b38b101a7aebd37e49c82964180b8a71 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 30 Dec 2024 12:06:24 -0800 Subject: [PATCH 03/18] fixes to broken doc-builds; minor docstrin tweaks --- docs/source/api_reference/index.rst | 1 + docs/source/api_reference/pipeline_graph.rst | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/source/api_reference/index.rst b/docs/source/api_reference/index.rst index 8840e798..f486d914 100644 --- a/docs/source/api_reference/index.rst +++ b/docs/source/api_reference/index.rst @@ -2,6 +2,7 @@ API Reference ============= .. automodule:: easylink + :show-inheritance: .. toctree:: :maxdepth: 1 diff --git a/docs/source/api_reference/pipeline_graph.rst b/docs/source/api_reference/pipeline_graph.rst index 8476e95b..64072603 100644 --- a/docs/source/api_reference/pipeline_graph.rst +++ b/docs/source/api_reference/pipeline_graph.rst @@ -1 +1,2 @@ .. automodule:: easylink.pipeline_graph + :show-inheritance: \ No newline at end of file From b35a54d2a6c28bc42a209648dd09897c02cf21e6 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 30 Dec 2024 12:18:18 -0800 Subject: [PATCH 04/18] remove breakpoints --- src/easylink/configuration.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index 543e1daa..d567b2d0 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -114,7 +114,6 @@ def __init__( self.update({"schema": self._get_schema(potential_schemas)}, layer="initial_data") self.schema.configure_pipeline(self.pipeline, self.input_data) self._validate() - breakpoint() self.freeze() @property From 781b97178fbbce172fab0c3c509c2ca25cb73bc1 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 30 Dec 2024 13:04:03 -0800 Subject: [PATCH 05/18] show class inheritance --- docs/source/api_reference/index.rst | 1 - docs/source/api_reference/pipeline_graph.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/source/api_reference/index.rst b/docs/source/api_reference/index.rst index f486d914..8840e798 100644 --- a/docs/source/api_reference/index.rst +++ b/docs/source/api_reference/index.rst @@ -2,7 +2,6 @@ API Reference ============= .. automodule:: easylink - :show-inheritance: .. toctree:: :maxdepth: 1 diff --git a/docs/source/api_reference/pipeline_graph.rst b/docs/source/api_reference/pipeline_graph.rst index 64072603..8476e95b 100644 --- a/docs/source/api_reference/pipeline_graph.rst +++ b/docs/source/api_reference/pipeline_graph.rst @@ -1,2 +1 @@ .. automodule:: easylink.pipeline_graph - :show-inheritance: \ No newline at end of file From e9d804b05a346ec18fcb02ac9539e5842a9f2235 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 31 Dec 2024 13:41:38 -0800 Subject: [PATCH 06/18] strengthen graph_components.py docstrings --- src/easylink/graph_components.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easylink/graph_components.py b/src/easylink/graph_components.py index 8fe3c9aa..26bbbd8b 100644 --- a/src/easylink/graph_components.py +++ b/src/easylink/graph_components.py @@ -39,8 +39,8 @@ class InputSlot: name: str """The name of the input slot.""" env_var: str | None - """The environment variable that this input slot will use to pass a list of data filepaths - to an Implementation.""" + """The environment variable that this input slot will use to pass a list of + data filepaths to an Implementation.""" validator: Callable[[str], None] """A callable that validates the input data being passed into the pipeline via this input slot. If the data is invalid, the callable should raise an exception From 550906705aacc8d82e7da6dde4e46bcc0287462d Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Thu, 2 Jan 2025 16:34:01 -0800 Subject: [PATCH 07/18] strengthen implementation.py docstrings --- src/easylink/implementation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/easylink/implementation.py b/src/easylink/implementation.py index 291efb45..2dd52677 100644 --- a/src/easylink/implementation.py +++ b/src/easylink/implementation.py @@ -64,7 +64,8 @@ def __init__( self.schema_steps = schema_steps """The requested :class:`~easylink.pipeline_schema.PipelineSchema` :class:`~easylink.step.Step` names for which this Implementation is - requested to be responsible in the pipeline.""" + expected to be responsible.""" + self.requires_spark = self._metadata.get("requires_spark", False) """Whether this Implementation requires a Spark environment.""" From 7f06ea35095331357775b95f06ad254b282db60f Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Thu, 2 Jan 2025 16:40:37 -0800 Subject: [PATCH 08/18] typo from bad merge conflict resolution --- src/easylink/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/easylink/cli.py b/src/easylink/cli.py index 0218fc6e..79bbfeec 100644 --- a/src/easylink/cli.py +++ b/src/easylink/cli.py @@ -97,7 +97,7 @@ def run( logger.info("Running pipeline") results_dir = get_results_directory(output_dir, no_timestamp).as_posix() logger.info(f"Results directory: {results_dir}") - # TODO [MIC-4493]: Add configuration validation`` + # TODO [MIC-4493]: Add configuration validation main = handle_exceptions( func=runner.main, exceptions_logger=logger, with_debugger=with_debugger From 50ac91f12154d58ee5ee57e82e61d1cdb33d3bd5 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 7 Jan 2025 08:47:56 -0800 Subject: [PATCH 09/18] various other pr fixes --- src/easylink/implementation.py | 2 +- src/easylink/utilities/general_utils.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/easylink/implementation.py b/src/easylink/implementation.py index 2dd52677..ded13e60 100644 --- a/src/easylink/implementation.py +++ b/src/easylink/implementation.py @@ -138,7 +138,7 @@ def outputs(self) -> dict[str, list[str]]: class NullImplementation: - """An partial :class:`Implementation` interface that represents that no container needs to run. + """A partial :class:`Implementation` interface that represents that no container needs to run. The primary use case for this class is when adding an :class:`~easylink.step.IOStep` - which does not have a corresponding :class:`Implementation` - to an diff --git a/src/easylink/utilities/general_utils.py b/src/easylink/utilities/general_utils.py index a5806b57..f5c033f2 100644 --- a/src/easylink/utilities/general_utils.py +++ b/src/easylink/utilities/general_utils.py @@ -93,10 +93,6 @@ def exit_with_validation_error(error_msg: dict) -> None: YAML format and terminates the program execution with a non-zero exit code (indicating an error). - This function logs the provided validation error messages using a structured - YAML format and terminates the program execution with a non-zero exit code - (indicating an error). - Parameters ---------- error_msg From 54a0187ae45764139616f315bfd8f1fcd4281174 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 7 Jan 2025 14:59:45 -0800 Subject: [PATCH 10/18] clean up configuration.py --- src/easylink/configuration.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index d567b2d0..256172d7 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -52,9 +52,8 @@ class Config(LayeredConfigTree): """A container for configuration information. - A ``Config`` (which inherits from :class:`~layered_config_tree.LayeredConfigTree`) - is a container that includes the combination of the user-provided pipeline, - input data, and computing environment specifications. It is a nested + A ``Config`` is a container that includes the combination of the user-provided + pipeline, input data, and computing environment specifications. It is a nested dictionary-like object that supports prioritized layers of configuration settings as well as dot-notation access to its attributes. @@ -89,9 +88,9 @@ class Config(LayeredConfigTree): Notes ----- The requested pipeline is checked against a set of supported - :class:`pipeline schemas `. The first - schema that successfully validates is assumed to be the correct one and is attached - to the ``Config`` object and its :meth:`~easylink.pipeline_schema.PipelineSchema.configure_pipeline` + ``PipelineSchemas``. The first schema that successfully validates is assumed + to be the correct one and is attached to the ``Config`` object and its + :meth:`~easylink.pipeline_schema.PipelineSchema.configure_pipeline` method is called. """ @@ -175,12 +174,12 @@ def spark_resources(self) -> dict[str, Any]: ################# def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema: - """Returns the first pipeline schema that successfully validates the requested pipeline. + """Returns the first ``PipelineSchema`` that successfully validates the requested pipeline. Parameters ---------- potential_schemas - Pipeline schemas to validate the pipeline configuration against. + ``PipelineSchemas`` to validate the pipeline configuration against. Returns ------- @@ -191,10 +190,10 @@ def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema Notes ----- This acts as the pipeline configuration file's validation method since - we can only find a matching schema if that file is valid. + we can only find a matching ``PipelineSchema`` if that file is valid. - This method returns the first schema that successfully validates and does - not attempt to validate additional ones. + This method returns the first ``PipelineSchema`` that successfully validates + and does not attempt to validate additional ones. """ errors = defaultdict(dict) # Try each schema until one is validated @@ -279,7 +278,7 @@ def load_params_from_specification( This gathers the pipeline, input data, and computing environment specifications as well as the results directory into a single dictionary for insertion into - the ``Config`` object. + the :class:`Config` object. Parameters ---------- From f86783f23d48ada6fb10fc69a7e0fd96518a949b Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 7 Jan 2025 15:14:56 -0800 Subject: [PATCH 11/18] clean up implementation.py --- src/easylink/configuration.py | 8 +-- src/easylink/implementation.py | 114 +++++++++++++++------------------ 2 files changed, 55 insertions(+), 67 deletions(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index 256172d7..c95e14ad 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -52,7 +52,7 @@ class Config(LayeredConfigTree): """A container for configuration information. - A ``Config`` is a container that includes the combination of the user-provided + A ``Config`` is a container that includes the combination of the user-provided pipeline, input data, and computing environment specifications. It is a nested dictionary-like object that supports prioritized layers of configuration settings as well as dot-notation access to its attributes. @@ -88,8 +88,8 @@ class Config(LayeredConfigTree): Notes ----- The requested pipeline is checked against a set of supported - ``PipelineSchemas``. The first schema that successfully validates is assumed - to be the correct one and is attached to the ``Config`` object and its + ``PipelineSchemas``. The first schema that successfully validates is assumed + to be the correct one and is attached to the ``Config`` object and its :meth:`~easylink.pipeline_schema.PipelineSchema.configure_pipeline` method is called. """ @@ -192,7 +192,7 @@ def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema This acts as the pipeline configuration file's validation method since we can only find a matching ``PipelineSchema`` if that file is valid. - This method returns the first ``PipelineSchema`` that successfully validates + This method returns the first ``PipelineSchema`` that successfully validates and does not attempt to validate additional ones. """ errors = defaultdict(dict) diff --git a/src/easylink/implementation.py b/src/easylink/implementation.py index ded13e60..69898514 100644 --- a/src/easylink/implementation.py +++ b/src/easylink/implementation.py @@ -22,22 +22,21 @@ class Implementation: """A representation of an actual container that will be executed for a :class:`~easylink.step.Step`. - Implementations exist at a lower level than :class:`Steps`. - This class contains information about what container to use, what environment - variables to set inside the container, and some metadata about the container. + ``Implementations`` exist at a lower level than Steps. This class contains + information about what container to use, what environment variables to set + inside the container, and some metadata about the container. Parameters ---------- schema_steps - The requested :class:`~easylink.pipeline_schema.PipelineSchema` - :class:`~easylink.step.Step` names for which this Implementation is - expected to be responsible. + The requested Step names for which this ``Implementation`` is expected to + be responsible. implementation_config - The configuration for this Implementation. + The configuration for this ``Implementation``. input_slots - The :class:`InputSlots` for this Implementation. + The :class:`InputSlots` for this ``Implementation``. output_slots - The :class:`OutputSlots` for this Implementation. + The :class:`OutputSlots` for this ``Implementation``. """ def __init__( @@ -48,32 +47,28 @@ def __init__( output_slots: Iterable["OutputSlot"] = (), ): self.name = implementation_config.name - """The name of this Implementation.""" + """The name of this ``Implementation``.""" self.input_slots = {slot.name: slot for slot in input_slots} - """A mapping of :class:`InputSlots` - names to their instances.""" + """A mapping of ``InputSlot`` names to their instances.""" self.output_slots = {slot.name: slot for slot in output_slots} - """A mapping of :class:`OutputSlots` - names to their instances.""" + """A mapping of ``OutputSlot`` names to their instances.""" self._metadata = self._load_metadata() self.environment_variables = self._get_env_vars(implementation_config) """A mapping of environment variables to set.""" self.metadata_steps = self._metadata["steps"] - """The names of the specific :class:`Steps` for which - this Implementation is responsible.""" + """The names of the specific ``Steps`` for which this ``Implementation`` + is responsible to implement.""" self.schema_steps = schema_steps - """The requested :class:`~easylink.pipeline_schema.PipelineSchema` - :class:`~easylink.step.Step` names for which this Implementation is - expected to be responsible.""" - + """The *user-requested* ``Step`` names for which this ``Implementation`` + is responsible to implement.""" self.requires_spark = self._metadata.get("requires_spark", False) - """Whether this Implementation requires a Spark environment.""" + """Whether this ``Implementation`` requires a Spark environment.""" def __repr__(self) -> str: return f"Implementation.{self.name}" def validate(self) -> list[str]: - """Validates individual Implementation instances. + """Validates individual ``Implementation`` instances. Returns ------- @@ -95,12 +90,12 @@ def validate(self) -> list[str]: ################## def _load_metadata(self) -> dict[str, str]: - """Loads the metadata for this Implementation instance.""" + """Loads the metadata for this ``Implementation`` instance.""" metadata = load_yaml(paths.IMPLEMENTATION_METADATA) return metadata[self.name] def _validate_expected_steps(self, logs: list[str]) -> list[str]: - """Validates that the Implementation is responsible for the correct steps.""" + """Validates that the ``Implementation`` is responsible for the correct steps.""" if not set(self.schema_steps) == set(self.metadata_steps): logs.append( f"Pipeline configuration nodes {self.schema_steps} do not match " @@ -109,51 +104,51 @@ def _validate_expected_steps(self, logs: list[str]) -> list[str]: return logs def _validate_container_exists(self, logs: list[str]) -> list[str]: - """Validates that the container for this Implementation exists.""" + """Validates that the container for this ``Implementation`` exists.""" err_str = f"Container '{self.singularity_image_path}' does not exist." if not Path(self.singularity_image_path).exists(): logs.append(err_str) return logs def _get_env_vars(self, implementation_config: LayeredConfigTree) -> dict[str, str]: - """Gets the environment variables relevant to this Implementation.""" + """Gets the environment variables relevant to this ``Implementation``.""" env_vars = self._metadata.get("env", {}) env_vars.update(implementation_config.get("configuration", {})) return env_vars @property def singularity_image_path(self) -> str: - """The path to the Singularity image for this Implementation.""" + """The path to the Singularity image for this ``Implementation``.""" return self._metadata["image_path"] @property def script_cmd(self) -> str: - """The command to run inside of the container for this Implementation.""" + """The command to run inside of the container for this ``Implementation``.""" return self._metadata["script_cmd"] @property def outputs(self) -> dict[str, list[str]]: - """The outputs expected from this Implementation.""" + """The outputs expected from this ``Implementation``.""" return self._metadata["outputs"] class NullImplementation: """A partial :class:`Implementation` interface that represents that no container needs to run. - The primary use case for this class is when adding an :class:`~easylink.step.IOStep` - - which does not have a corresponding :class:`Implementation` - to an - :class:`~easylink.graph_components.ImplementationGraph` since adding any new - node requires an object with :class:`~easylink.graph_components.InputSlot` + The primary use case for this class is when adding an + :class:`~easylink.step.IOStep` - which does not have a corresponding + ``Implementation`` - to an :class:`~easylink.graph_components.ImplementationGraph` + since adding any new node requires an object with :class:`~easylink.graph_components.InputSlot` and :class:`~easylink.graph_components.OutputSlot` names. Parameters ---------- name - The name of this NullImplementation. + The name of this ``NullImplementation``. input_slots - The :class:`InputSlots` for this NullImplementation. + The ``InputSlots`` for this ``NullImplementation``. output_slots - The :class:`OutputSlots` for this NullImplementation. + The ``OutputSlots`` for this ``NullImplementation``. """ def __init__( @@ -163,33 +158,30 @@ def __init__( output_slots: Iterable["OutputSlot"] = (), ): self.name = name - """The name of this NullImplementation.""" + """The name of this ``NullImplementation``.""" self.input_slots = {slot.name: slot for slot in input_slots} - """A mapping of :class:`InputSlots` - names to their instances.""" + """A mapping of ``InputSlot`` names to their instances.""" self.output_slots = {slot.name: slot for slot in output_slots} - """A mapping of :class:`OutputSlots` - names to their instances.""" + """A mapping of ``OutputSlot`` names to their instances.""" self.schema_steps = [self.name] - """The requested :class:`~easylink.pipeline_schema.PipelineSchema` - :class:`~easylink.step.Step` names this ``NullImplementation`` implements.""" + """The requested :class:`~easylink.step.Step` names this ``NullImplementation`` implements.""" self.combined_name = None """The name of the combined implementation of which ``NullImplementation`` - is a constituent. This is definitionally None for a ``NullImplementation``.""" + is a constituent. This is definitionally None.""" class PartialImplementation: """A representation of one part of a combined implementation that spans multiple :class:`Steps`. - A PartialImplementation is what is initially added to the :class:`~easylink.graph_components.ImplementationGraph` - when a so-called "combined implementation" is used (i.e. an :class:`Implementation` - that spans multiple :class:`Steps`). - We initially add a node for _each_ :class:`~easylink.step.Step`, which has as - its ``implementation`` attribute a PartialImplementation. Such a graph is not + A ``PartialImplementation`` is what is initially added to the + :class:`~easylink.graph_components.ImplementationGraph` when a so-called + "combined implementation" is used (i.e. an :class:`Implementation` that spans + multiple ``Steps``). We initially add a node for *each* ``Step``, which has as + its ``implementation`` attribute a ``PartialImplementation``. Such a graph is not yet fit to run. When we make our second pass through, after the flat (non-hierarchical) :class:`~easylink.pipeline_graph.PipelineGraph` has been created, we find the - set of PartialImplementation nodes corresponding to each combined implementation - and replace them with a single node with a true :class:`Implementation` representing + set of ``PartialImplementation`` nodes corresponding to each combined implementation + and replace them with a single node with a true ``Implementation`` representing the combined implementation. Parameters @@ -198,13 +190,12 @@ class PartialImplementation: The name of the combined implementation of which this ``PartialImplementation`` is a part. schema_step - The requested :class:`~easylink.pipeline_schema.PipelineSchema` - :class:`~easylink.step.Step` name that this ``PartialImplementation`` - partially implements. + The requested ``Step`` name that this ``PartialImplementation`` partially + implements. input_slots - The :class:`InputSlots` for this PartialImplementation. + The :class:`InputSlots` for this ``PartialImplementation``. output_slots - The :class:`OutputSlots` for this PartialImplementation. + The :class:`OutputSlots` for this ``PartialImplementation``. """ @@ -219,12 +210,9 @@ def __init__( """The name of the combined implementation of which this ``PartialImplementation`` is a part.""" self.schema_step = schema_step - """The requested :class:`~easylink.pipeline_schema.PipelineSchema` - :class:`~easylink.step.Step` name that this ``PartialImplementation`` - partially implements.""" + """The requested ``Step`` name that this ``PartialImplementation`` partially + implements.""" self.input_slots = {slot.name: slot for slot in input_slots} - """A mapping of :class:`InputSlots` - names to their instances.""" + """A mapping of ``InputSlot`` names to their instances.""" self.output_slots = {slot.name: slot for slot in output_slots} - """A mapping of :class:`OutputSlots` - names to their instances.""" + """A mapping of ``OutputSlot`` names to their instances.""" From 391202540a4617b82b8195f831dd93374f7441c0 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 7 Jan 2025 15:39:36 -0800 Subject: [PATCH 12/18] clean up graph_components.py --- src/easylink/graph_components.py | 125 ++++++++++++++----------------- 1 file changed, 56 insertions(+), 69 deletions(-) diff --git a/src/easylink/graph_components.py b/src/easylink/graph_components.py index 26bbbd8b..e218f08c 100644 --- a/src/easylink/graph_components.py +++ b/src/easylink/graph_components.py @@ -37,13 +37,13 @@ class InputSlot: """ name: str - """The name of the input slot.""" + """The name of the ``InputSlot``.""" env_var: str | None - """The environment variable that this input slot will use to pass a list of - data filepaths to an Implementation.""" + """The environment variable that this ``InputSlot`` will use to pass a list + of data filepaths to an ``Implementation``.""" validator: Callable[[str], None] - """A callable that validates the input data being passed into the pipeline - via this input slot. If the data is invalid, the callable should raise an exception + """A callable that validates the input data being passed into the pipeline via + this ``InputSlot``. If the data is invalid, the callable should raise an exception with a descriptive error message which will then be reported to the user.""" @@ -51,21 +51,21 @@ class InputSlot: class OutputSlot: """An abstraction representing a single output slot from a specific node. - In order to pass data between nodes, an OutputSlot of one node can be connected + In order to pass data between nodes, an ``OutputSlot`` of one node can be connected to an :class:`InputSlot` of another node via an :class:`EdgeParams` instance. Notes ----- Nodes can be either :class:`Steps` or :class:`Implementations`. - Input data is validated via the :class:`InputSlot's` required - :attr:`~InputSlot.validator` attribute. In order to prevent multiple - validations of the same files (since outputs of one node can be inputs to another), - no such validator is stored here on the OutputSlot. + Input data is validated via the ``InputSlot`` required :attr:`~InputSlot.validator` + attribute. In order to prevent multiple validations of the same files (since + outputs of one node can be inputs to another), no such validator is stored + here on the ``OutputSlot``. """ name: str - """The name of the output slot.""" + """The name of the ``OutputSlot``.""" @dataclass(frozen=True) @@ -81,15 +81,15 @@ class EdgeParams: """ source_node: str - """The name of the source node.""" + """The name of the source node/``Step``.""" target_node: str - """The name of the target node.""" + """The name of the target node/``Step``.""" output_slot: str - """The name of the :class:`OutputSlot` of the source node.""" + """The name of the ``OutputSlot`` of the source node/``Step``.""" input_slot: str - """The name of the :class:`InputSlot` of the target node.""" + """The name of the ``InputSlot`` of the target node/``Step``.""" filepaths: tuple[str] | None = None - """The filepaths that are passed from the source node to the target node.""" + """The filepaths that are passed from the source node/``Step`` to the target node/``Step``.""" @classmethod def from_graph_edge( @@ -98,16 +98,16 @@ def from_graph_edge( sink: str, edge_attrs: dict[str, OutputSlot | InputSlot | str | None], ) -> EdgeParams: - """A convenience method to create an EdgeParams instance. + """A convenience method to create an ``EdgeParams`` instance. Parameters ---------- source - The name of the source node. + The name of the source node/``Step``. sink - The name of the target node. + The name of the target node/``Step``. edge_attrs - The attributes of the edge connecting the source and target nodes. + The attributes of the edge connecting the source and target nodes/``Steps``. 'output_slot' and 'input_slot' are required keys and 'filepaths' is optional. """ return cls( @@ -122,51 +122,45 @@ def from_graph_edge( class StepGraph(nx.MultiDiGraph): """A directed acyclic graph (DAG) of :class:`Steps` and the data dependencies between them. - StepGraphs are DAGs with :class:`Steps` - for nodes and the file dependencies between them for edges. Multiple edges - between nodes are permitted. + ``StepGraphs`` are DAGs with ``Steps`` for nodes and the file dependencies between + them for edges. Multiple edges between nodes are permitted. Notes ----- These are high-level abstractions; they represent a conceptual pipeline - graph with no detail as to how each :class:`~easylink.step.Step` is implemented. - - The highest level StepGraph is the that of the entire :class:`~easylink.pipeline_schema.PipelineSchema`. + graph with no detail as to how each ``Step`` is implemented. - See Also - -------- - :class:`ImplementationGraph` - :class:`~easylink.pipeline_schema.PipelineSchema` + The highest level ``StepGraph`` is the that of the entire :class:`~easylink.pipeline_schema.PipelineSchema`. """ @property def step_nodes(self) -> list[str]: - """The topologically sorted list of node/:class:`~easylink.step.Step` names.""" + """The topologically sorted list of node/``Step`` names.""" ordered_nodes = list(nx.topological_sort(self)) return [node for node in ordered_nodes if node != "input_data" and node != "results"] @property def steps(self) -> list[Step]: - """The list of all :class:`Steps` in the graph.""" + """The list of all ``Steps`` in the graph.""" return [self.nodes[node]["step"] for node in self.step_nodes] def add_node_from_step(self, step: Step) -> None: - """Adds a new node to the StepGraph. + """Adds a new node to the ``StepGraph``. Parameters ---------- step - The :class:`~easylink.step.Step` to add to the graph as a new node. + The ``Step`` to add to the graph as a new node. """ self.add_node(step.name, step=step) def add_edge_from_params(self, edge_params: EdgeParams) -> None: - """Adds a new edge to the StepGraph. + """Adds a new edge to the ``StepGraph``. Parameters ---------- edge_params - The :class:`EdgeParams` to add to the graph as a new edge. + The details of the new edge to be added to the graph. """ return self.add_edge( edge_params.source_node, @@ -181,62 +175,55 @@ def add_edge_from_params(self, edge_params: EdgeParams) -> None: class ImplementationGraph(nx.MultiDiGraph): - """A graph of :class:`Implementations`. + """A directed acyclic graph (DAG) of :class:`Implementations`. - ImplementationGraphs are directed graphs with :class:`Implementations` - for nodes and the file dependencies between them for edges. Self-edges as well - as multiple edges between nodes are permitted. + ``ImplementationGraphs`` are DAGs with ``Implementations`` for nodes and the + file dependencies between them for edges. Self-edges as well as multiple edges + between nodes are permitted. Notes ----- - An ImplementationGraph is a low-level abstraction; it represents the *actual + An ``ImplementationGraph`` is a low-level abstraction; it represents the *actual implementations* of each :class:`~easylink.step.Step` in the pipeline. This is in contrast to a :class:`StepGraph`, which can be an intricate nested structure - due to the various complex and self-similar :class:`~easylink.step.Step` instances - (which represent abstract operations such as "loop this step N times"). An - ImplementationGraph is the flattened and concrete graph of - :class:`Implementations` to run. + due to the various complex and self-similar ``Step`` instances (which represent + abstract operations such as "loop this step N times"). An ``ImplementationGraph`` + is the flattened and concrete graph of ``Implementations`` to run. - The highest level ImplementationGraph is the that of the entire + The highest level ``ImplementationGraph`` is the that of the entire :class:`~easylink.pipeline_graph.PipelineGraph`. - - See Also - -------- - :class:`StepGraph` - :class:`~easylink.pipeline_graph.PipelineGraph` """ @property def implementation_nodes(self) -> list[str]: - """The topologically sorted list of node/:class:`~easylink.implementation.Implementation` names.""" + """The topologically sorted list of node/``Implementation`` names.""" ordered_nodes = list(nx.topological_sort(self)) return [node for node in ordered_nodes if node != "input_data" and node != "results"] @property def implementations(self) -> list[Implementation]: - """The list of all :class:`Implementations` in the graph.""" + """The list of all ``Implementations`` in the graph.""" return [self.nodes[node]["implementation"] for node in self.implementation_nodes] def add_node_from_implementation(self, node_name, implementation: Implementation) -> None: - """Adds a new node to the ImplementationGraph. + """Adds a new node to the ``ImplementationGraph``. Parameters ---------- node_name The name of the new node. implementation - The :class:`~easylink.implementation.Implementation` to add to the graph - as a new node. + The ``Implementation`` to add to the graph as a new node. """ self.add_node(node_name, implementation=implementation) def add_edge_from_params(self, edge_params: EdgeParams) -> None: - """Adds a new edge to the ImplementationGraph. + """Adds a new edge to the ``ImplementationGraph``. Parameters ---------- edge_params - The :class:`EdgeParams` to add to the graph as a new edge. + The details of the new edge to be added to the graph. """ return self.add_edge( edge_params.source_node, @@ -253,32 +240,32 @@ def add_edge_from_params(self, edge_params: EdgeParams) -> None: @dataclass(frozen=True) class SlotMapping(ABC): - """A mapping between a slot on a parent Step and a slot on (one of) its child Steps. + """A mapping between a slot on a parent :class:`~easylink.step.Step` and a slot on one of its child ``Steps``. - SlotMapping is an interface intended to be used by concrete :class:`InputSlotMapping` + ``SlotMapping`` is an interface intended to be used by concrete :class:`InputSlotMapping` and :class:`OutputSlotMapping` classes. It represents a mapping between - parent and child nodes/:class:`Steps` at different levels - of a potentially-nested :class:`~easylink.pipeline_schema.PipelineSchema`. + parent and child nodes/``Steps`` at different levels of a potentially-nested + :class:`~easylink.pipeline_schema.PipelineSchema`. """ parent_slot: str """The name of the parent slot.""" child_node: str - """The name of the child node.""" + """The name of the child node/``Step``.""" child_slot: str """The name of the child slot.""" @abstractmethod def remap_edge(self, edge: EdgeParams) -> EdgeParams: - """Remaps an edge to connect the parent and child nodes.""" + """Remaps an edge to connect the parent and child nodes/``Steps``.""" pass class InputSlotMapping(SlotMapping): - """A mapping between :class:`InputSlots` of a parent node and a child node.""" + """A mapping between :class:`InputSlots` of a parent node/:class:`~easylink.step.Step` and a child node/``Step``.""" def remap_edge(self, edge: EdgeParams) -> EdgeParams: - """Remaps an edge's :class:`InputSlot`. + """Remaps an edge's ``InputSlot``. Parameters ---------- @@ -288,7 +275,7 @@ def remap_edge(self, edge: EdgeParams) -> EdgeParams: Returns ------- EdgeParams - The remapped edge. + The details of the remapped edge. Raises ------ @@ -306,7 +293,7 @@ def remap_edge(self, edge: EdgeParams) -> EdgeParams: class OutputSlotMapping(SlotMapping): - """A mapping between :class:`InputSlots` of a parent node and a child node.""" + """A mapping between :class:`InputSlots` of a parent node/:class:`~easylink.step.Step` and a child node/``Step``.""" def remap_edge(self, edge: EdgeParams) -> EdgeParams: """Remaps an edge's :class:`OutputSlot`. @@ -319,7 +306,7 @@ def remap_edge(self, edge: EdgeParams) -> EdgeParams: Returns ------- EdgeParams - The remapped edge. + The details of the remapped edge. Raises ------ From b657e2258b1a69c9036cff60a1446793a3a94451 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Tue, 7 Jan 2025 16:11:50 -0800 Subject: [PATCH 13/18] swap colors of linked and not-linked code highlights --- docs/source/_static/style.css | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/_static/style.css b/docs/source/_static/style.css index b7ddfd9b..8e68ba5a 100644 --- a/docs/source/_static/style.css +++ b/docs/source/_static/style.css @@ -1,3 +1,12 @@ .wy-nav-content { max-width: 1000px !important; } +/* make links red and bold */ +code.xref { + color: red !important; + font-weight: bold !important; +} +/* make inline code black */ +code { + color: black !important; +} \ No newline at end of file From 8cc6d8283d77686cb367859ba770cfd51799b97c Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 8 Jan 2025 10:59:08 -0800 Subject: [PATCH 14/18] black --- src/easylink/graph_components.py | 6 +++--- src/easylink/implementation.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/easylink/graph_components.py b/src/easylink/graph_components.py index e218f08c..8aa0c019 100644 --- a/src/easylink/graph_components.py +++ b/src/easylink/graph_components.py @@ -58,9 +58,9 @@ class OutputSlot: ----- Nodes can be either :class:`Steps` or :class:`Implementations`. - Input data is validated via the ``InputSlot`` required :attr:`~InputSlot.validator` - attribute. In order to prevent multiple validations of the same files (since - outputs of one node can be inputs to another), no such validator is stored + Input data is validated via the ``InputSlot`` required :attr:`~InputSlot.validator` + attribute. In order to prevent multiple validations of the same files (since + outputs of one node can be inputs to another), no such validator is stored here on the ``OutputSlot``. """ diff --git a/src/easylink/implementation.py b/src/easylink/implementation.py index 69898514..a50ad653 100644 --- a/src/easylink/implementation.py +++ b/src/easylink/implementation.py @@ -190,7 +190,7 @@ class PartialImplementation: The name of the combined implementation of which this ``PartialImplementation`` is a part. schema_step - The requested ``Step`` name that this ``PartialImplementation`` partially + The requested ``Step`` name that this ``PartialImplementation`` partially implements. input_slots The :class:`InputSlots` for this ``PartialImplementation``. From 6332f60784fb1679a899dcf104bdb4e6dd1a9ccc Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Wed, 8 Jan 2025 12:34:24 -0800 Subject: [PATCH 15/18] fixes for bad merge conflict resolutions --- src/easylink/configuration.py | 24 +++++----- src/easylink/graph_components.py | 76 ++++++++++++++++++-------------- src/easylink/implementation.py | 38 ++++++++-------- tests/unit/test_config.py | 2 +- 4 files changed, 75 insertions(+), 65 deletions(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index c95e14ad..d52d742d 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -174,7 +174,7 @@ def spark_resources(self) -> dict[str, Any]: ################# def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema: - """Returns the first ``PipelineSchema`` that successfully validates the requested pipeline. + """Returns the first :class:`~easylink.pipeline_schema.PipelineSchema` that validates the requested pipeline. Parameters ---------- @@ -183,17 +183,17 @@ def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema Returns ------- - The first pipeline schema that successfully validates the requested pipeline. - If no validated pipeline schema is found, `exit()` is called with `errno.EINVAL` - and any validation errors are logged. + The first ``PipelineSchema`` that validates the requested pipeline. + If no validated ``PipelineSchema`` is found, `exit()` is called with + `errno.EINVAL` and any validation errors are logged. Notes ----- This acts as the pipeline configuration file's validation method since we can only find a matching ``PipelineSchema`` if that file is valid. - This method returns the first ``PipelineSchema`` that successfully validates - and does not attempt to validate additional ones. + This method returns the *first* ``PipelineSchema`` that validates and does + not attempt to check additional ones. """ errors = defaultdict(dict) # Try each schema until one is validated @@ -283,11 +283,11 @@ def load_params_from_specification( Parameters ---------- pipeline_specification - The path to the pipeline specification yaml file. + The path to the pipeline specification file. input_data - The path to the input data yaml file. + The path to the input data file. computing_environment - The path to the computing environment yaml file. + The path to the computing environment file. results_dir The path to the results directory. @@ -306,7 +306,7 @@ def load_params_from_specification( def _load_input_data_paths( input_data_specification_path: str | Path, ) -> dict[str, list[Path]]: - """Creates a dictionary of input data paths from the input data yaml file.""" + """Creates a dictionary of input data paths from the input data specification file.""" input_data_paths = load_yaml(input_data_specification_path) if not isinstance(input_data_paths, dict): raise TypeError( @@ -322,13 +322,13 @@ def _load_input_data_paths( def _load_computing_environment( computing_environment_specification_path: str | None, ) -> dict[Any, Any]: - """Loads the computing environment yaml file and returns the contents as a dict.""" + """Loads the computing environment specification file and returns the contents as a dict.""" if not computing_environment_specification_path: return {} # handles empty environment.yaml elif not Path(computing_environment_specification_path).is_file(): raise FileNotFoundError( "Computing environment is expected to be a path to an existing" - f" yaml file. Input was: '{computing_environment_specification_path}'" + f" specification file. Input was: '{computing_environment_specification_path}'" ) else: return load_yaml(computing_environment_specification_path) diff --git a/src/easylink/graph_components.py b/src/easylink/graph_components.py index 8aa0c019..7093856c 100644 --- a/src/easylink/graph_components.py +++ b/src/easylink/graph_components.py @@ -24,7 +24,7 @@ @dataclass(frozen=True) class InputSlot: - """An abstraction representing a single input slot to a specific node. + """A single input slot to a specific node. ``InputSlots`` represent distinct semantic categories of input files, between which a node must be able to differentiate. In order to pass data between nodes, @@ -39,20 +39,24 @@ class InputSlot: name: str """The name of the ``InputSlot``.""" env_var: str | None - """The environment variable that this ``InputSlot`` will use to pass a list - of data filepaths to an ``Implementation``.""" + """The environment variable that is used to pass a list of data filepaths to + an ``Implementation``.""" validator: Callable[[str], None] - """A callable that validates the input data being passed into the pipeline via - this ``InputSlot``. If the data is invalid, the callable should raise an exception - with a descriptive error message which will then be reported to the user.""" + """A function that validates the input data being passed into the pipeline via + this ``InputSlot``. If the data is invalid, the function should raise an exception + with a descriptive error message which will then be reported to the user. + **Note that the function *must* be defined in the :mod:`easylink.utilities.validation_utils` + module!**""" @dataclass(frozen=True) class OutputSlot: - """An abstraction representing a single output slot from a specific node. + """A single output slot from a specific node. - In order to pass data between nodes, an ``OutputSlot`` of one node can be connected - to an :class:`InputSlot` of another node via an :class:`EdgeParams` instance. + ``Outputslots`` represent distinct semantic categories of output files, between + which a node must be able to differentiate. In order to pass data between nodes, + an ``OutputSlot`` of one node can be connected to an :class:`InputSlot` of another + node via an :class:`EdgeParams` instance. Notes ----- @@ -70,9 +74,9 @@ class OutputSlot: @dataclass(frozen=True) class EdgeParams: - """A representation of an edge between two nodes in a graph. + """The details of an edge between two nodes in a graph. - EdgeParams connect the :class:`OutputSlot` of a source node to the :class:`InputSlot` + ``EdgeParams`` connect the :class:`OutputSlot` of a source node to the :class:`InputSlot` of a target node. Notes @@ -81,15 +85,15 @@ class EdgeParams: """ source_node: str - """The name of the source node/``Step``.""" + """The name of the source node.""" target_node: str - """The name of the target node/``Step``.""" + """The name of the target node.""" output_slot: str - """The name of the ``OutputSlot`` of the source node/``Step``.""" + """The name of the source node's ``OutputSlot``.""" input_slot: str - """The name of the ``InputSlot`` of the target node/``Step``.""" + """The name of the target node's ``InputSlot``.""" filepaths: tuple[str] | None = None - """The filepaths that are passed from the source node/``Step`` to the target node/``Step``.""" + """The filepaths that are passed from the source node to the target node.""" @classmethod def from_graph_edge( @@ -103,12 +107,13 @@ def from_graph_edge( Parameters ---------- source - The name of the source node/``Step``. + The name of the source node. sink - The name of the target node/``Step``. + The name of the target node. edge_attrs - The attributes of the edge connecting the source and target nodes/``Steps``. - 'output_slot' and 'input_slot' are required keys and 'filepaths' is optional. + The attributes of the edge connecting the source and target nodes. + 'output_slot' and 'input_slot' are required keys while 'filepaths' is + optional. """ return cls( source, @@ -120,10 +125,11 @@ def from_graph_edge( class StepGraph(nx.MultiDiGraph): - """A directed acyclic graph (DAG) of :class:`Steps` and the data dependencies between them. + """A directed acyclic graph (DAG) of :class:`Steps`. - ``StepGraphs`` are DAGs with ``Steps`` for nodes and the file dependencies between - them for edges. Multiple edges between nodes are permitted. + ``StepGraphs`` are DAGs with ``Step`` names for nodes and their corresponding + ``Step`` instances as attributes on those nodes. The file dependencies between + nodes are the graph edges; multiple edges between nodes are permitted. Notes ----- @@ -135,13 +141,13 @@ class StepGraph(nx.MultiDiGraph): @property def step_nodes(self) -> list[str]: - """The topologically sorted list of node/``Step`` names.""" + """The topologically sorted list of ``Step`` names.""" ordered_nodes = list(nx.topological_sort(self)) return [node for node in ordered_nodes if node != "input_data" and node != "results"] @property def steps(self) -> list[Step]: - """The list of all ``Steps`` in the graph.""" + """The topologically sorted list of all ``Steps`` in the graph.""" return [self.nodes[node]["step"] for node in self.step_nodes] def add_node_from_step(self, step: Step) -> None: @@ -196,13 +202,13 @@ class ImplementationGraph(nx.MultiDiGraph): @property def implementation_nodes(self) -> list[str]: - """The topologically sorted list of node/``Implementation`` names.""" + """The topologically sorted list of ``Implementation`` names.""" ordered_nodes = list(nx.topological_sort(self)) return [node for node in ordered_nodes if node != "input_data" and node != "results"] @property def implementations(self) -> list[Implementation]: - """The list of all ``Implementations`` in the graph.""" + """The topologically sorted list of all ``Implementations`` in the graph.""" return [self.nodes[node]["implementation"] for node in self.implementation_nodes] def add_node_from_implementation(self, node_name, implementation: Implementation) -> None: @@ -240,29 +246,33 @@ def add_edge_from_params(self, edge_params: EdgeParams) -> None: @dataclass(frozen=True) class SlotMapping(ABC): - """A mapping between a slot on a parent :class:`~easylink.step.Step` and a slot on one of its child ``Steps``. + """A mapping between a slot on a parent node and a slot on one of its child nodes. ``SlotMapping`` is an interface intended to be used by concrete :class:`InputSlotMapping` and :class:`OutputSlotMapping` classes. It represents a mapping between - parent and child nodes/``Steps`` at different levels of a potentially-nested + parent and child nodes at different levels of a potentially-nested :class:`~easylink.pipeline_schema.PipelineSchema`. + + Notes + ----- + Nodes can be either :class:`Steps` or :class:`Implementations`. """ parent_slot: str """The name of the parent slot.""" child_node: str - """The name of the child node/``Step``.""" + """The name of the child node.""" child_slot: str """The name of the child slot.""" @abstractmethod def remap_edge(self, edge: EdgeParams) -> EdgeParams: - """Remaps an edge to connect the parent and child nodes/``Steps``.""" + """Remaps an edge to connect the parent and child nodes.""" pass class InputSlotMapping(SlotMapping): - """A mapping between :class:`InputSlots` of a parent node/:class:`~easylink.step.Step` and a child node/``Step``.""" + """A mapping between :class:`InputSlots` of a parent node and a child node.""" def remap_edge(self, edge: EdgeParams) -> EdgeParams: """Remaps an edge's ``InputSlot``. @@ -293,7 +303,7 @@ def remap_edge(self, edge: EdgeParams) -> EdgeParams: class OutputSlotMapping(SlotMapping): - """A mapping between :class:`InputSlots` of a parent node/:class:`~easylink.step.Step` and a child node/``Step``.""" + """A mapping between :class:`InputSlots` of a parent node and a child node.""" def remap_edge(self, edge: EdgeParams) -> EdgeParams: """Remaps an edge's :class:`OutputSlot`. diff --git a/src/easylink/implementation.py b/src/easylink/implementation.py index a50ad653..d05e8b5d 100644 --- a/src/easylink/implementation.py +++ b/src/easylink/implementation.py @@ -22,21 +22,21 @@ class Implementation: """A representation of an actual container that will be executed for a :class:`~easylink.step.Step`. - ``Implementations`` exist at a lower level than Steps. This class contains + ``Implementations`` exist at a lower level than ``Steps``. This class contains information about what container to use, what environment variables to set inside the container, and some metadata about the container. Parameters ---------- schema_steps - The requested Step names for which this ``Implementation`` is expected to - be responsible. + The user-requested ``Step`` names for which this ``Implementation`` is + expected to implement. implementation_config - The configuration for this ``Implementation``. + The configuration details required to run the relevant container. input_slots - The :class:`InputSlots` for this ``Implementation``. + All required :class:`InputSlots`. output_slots - The :class:`OutputSlots` for this ``Implementation``. + All required :class:`OutputSlots`. """ def __init__( @@ -57,10 +57,10 @@ def __init__( """A mapping of environment variables to set.""" self.metadata_steps = self._metadata["steps"] """The names of the specific ``Steps`` for which this ``Implementation`` - is responsible to implement.""" + has been designed to implement.""" self.schema_steps = schema_steps - """The *user-requested* ``Step`` names for which this ``Implementation`` - is responsible to implement.""" + """The names of the specific ``Steps`` that the user has requested to be + implemented by this particular ``Implementation``.""" self.requires_spark = self._metadata.get("requires_spark", False) """Whether this ``Implementation`` requires a Spark environment.""" @@ -90,7 +90,7 @@ def validate(self) -> list[str]: ################## def _load_metadata(self) -> dict[str, str]: - """Loads the metadata for this ``Implementation`` instance.""" + """Loads the relevant implementation metadata.""" metadata = load_yaml(paths.IMPLEMENTATION_METADATA) return metadata[self.name] @@ -104,36 +104,36 @@ def _validate_expected_steps(self, logs: list[str]) -> list[str]: return logs def _validate_container_exists(self, logs: list[str]) -> list[str]: - """Validates that the container for this ``Implementation`` exists.""" + """Validates that the container to run exists.""" err_str = f"Container '{self.singularity_image_path}' does not exist." if not Path(self.singularity_image_path).exists(): logs.append(err_str) return logs def _get_env_vars(self, implementation_config: LayeredConfigTree) -> dict[str, str]: - """Gets the environment variables relevant to this ``Implementation``.""" + """Gets the relevant environment variables.""" env_vars = self._metadata.get("env", {}) env_vars.update(implementation_config.get("configuration", {})) return env_vars @property def singularity_image_path(self) -> str: - """The path to the Singularity image for this ``Implementation``.""" + """The path to the required Singularity image.""" return self._metadata["image_path"] @property def script_cmd(self) -> str: - """The command to run inside of the container for this ``Implementation``.""" + """The command to run inside of the container.""" return self._metadata["script_cmd"] @property def outputs(self) -> dict[str, list[str]]: - """The outputs expected from this ``Implementation``.""" + """The expected output metadata.""" return self._metadata["outputs"] class NullImplementation: - """A partial :class:`Implementation` interface that represents that no container needs to run. + """A partial :class:`Implementation` interface when no container is needed to run. The primary use case for this class is when adding an :class:`~easylink.step.IOStep` - which does not have a corresponding @@ -146,9 +146,9 @@ class NullImplementation: name The name of this ``NullImplementation``. input_slots - The ``InputSlots`` for this ``NullImplementation``. + All required ``InputSlots``. output_slots - The ``OutputSlots`` for this ``NullImplementation``. + All required ``OutputSlots``. """ def __init__( @@ -171,7 +171,7 @@ def __init__( class PartialImplementation: - """A representation of one part of a combined implementation that spans multiple :class:`Steps`. + """One part of a combined implementation that spans multiple :class:`Steps`. A ``PartialImplementation`` is what is initially added to the :class:`~easylink.graph_components.ImplementationGraph` when a so-called diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index d90eb2aa..2e01d55d 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -78,7 +78,7 @@ def test__load_computing_environment(test_dir, environment_file, expected): def test_load_missing_computing_environment_fails(): with pytest.raises( FileNotFoundError, - match="Computing environment is expected to be a path to an existing yaml file. .*", + match="Computing environment is expected to be a path to an existing specification file. .*", ): _load_computing_environment(Path("some/bogus/path.yaml")) From 4b15bf486b358dbeaf937117f473e160dab56d28 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 13 Jan 2025 09:34:00 -0800 Subject: [PATCH 16/18] minor tweak --- src/easylink/configuration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index d52d742d..5702259c 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -52,8 +52,8 @@ class Config(LayeredConfigTree): """A container for configuration information. - A ``Config`` is a container that includes the combination of the user-provided - pipeline, input data, and computing environment specifications. It is a nested + The ``Config`` contains the user-provided specifications for the pipeline, + input data, and computing environment specifications. It is a nested dictionary-like object that supports prioritized layers of configuration settings as well as dot-notation access to its attributes. From 4e7604179626774f177d2013ca1c5e8f3c54da20 Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 13 Jan 2025 09:38:46 -0800 Subject: [PATCH 17/18] add sphinx configuration to readthedocs.yml --- .readthedocs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index 3035caa9..43cb1928 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,6 +1,9 @@ # This is the version for the readthedocs configuration. Version 2 ignores # web-based configuration and uses everything from this file. version: 2 +sphinx: + configuration: docs/source/conf.py + fail_on_warning: true # Configure the python version and environment construction run before # docs are built. @@ -16,6 +19,3 @@ python: extra_requirements: - docs -# Doc builds will fail if there are any warnings -sphinx: - fail_on_warning: true From b6deda6af8dcc8f7e98c73d43bfbb25e6895a17d Mon Sep 17 00:00:00 2001 From: Steve Bachmeier Date: Mon, 13 Jan 2025 09:40:09 -0800 Subject: [PATCH 18/18] black --- src/easylink/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/easylink/configuration.py b/src/easylink/configuration.py index 5702259c..01fd547d 100644 --- a/src/easylink/configuration.py +++ b/src/easylink/configuration.py @@ -52,7 +52,7 @@ class Config(LayeredConfigTree): """A container for configuration information. - The ``Config`` contains the user-provided specifications for the pipeline, + The ``Config`` contains the user-provided specifications for the pipeline, input data, and computing environment specifications. It is a nested dictionary-like object that supports prioritized layers of configuration settings as well as dot-notation access to its attributes.