diff --git a/src/ctapipe/core/provenance.py b/src/ctapipe/core/provenance.py index b2018f06038..a8f602e127f 100644 --- a/src/ctapipe/core/provenance.py +++ b/src/ctapipe/core/provenance.py @@ -96,7 +96,7 @@ def _get_current_or_start_activity(self): return self.current_activity - def add_input_file(self, filename, role=None): + def add_input_file(self, filename, role=None, add_meta=True): """register an input to the current activity Parameters @@ -107,14 +107,14 @@ def add_input_file(self, filename, role=None): role this input file satisfies (optional) """ activity = self._get_current_or_start_activity() - activity.register_input(abspath(filename), role=role) + activity.register_input(abspath(filename), role=role, add_meta=add_meta) log.debug( "added input entity '%s' to activity: '%s'", filename, activity.name, ) - def add_output_file(self, filename, role=None): + def add_output_file(self, filename, role=None, add_meta=True): """ register an output to the current activity @@ -127,7 +127,7 @@ def add_output_file(self, filename, role=None): """ activity = self._get_current_or_start_activity() - activity.register_output(abspath(filename), role=role) + activity.register_output(abspath(filename), role=role, add_meta=add_meta) log.debug( "added output entity '%s' to activity: '%s'", filename, @@ -244,7 +244,7 @@ def start(self): self._prov["start"].update(_sample_cpu_and_memory()) self._prov["system"].update(_get_system_provenance()) - def register_input(self, url, role=None): + def register_input(self, url, role=None, add_meta=True): """ Add a URL of a file to the list of inputs (can be a filename or full url, if no URL specifier is given, assume 'file://') @@ -255,13 +255,16 @@ def register_input(self, url, role=None): filename or url of input file role: str role name that this input satisfies + add_meta: bool + If true, try to load reference metadata from input file + and add to provenance. """ - reference_meta = self._get_reference_meta(url=url) + reference_meta = self._get_reference_meta(url=url) if add_meta else None self._prov["input"].append( dict(url=url, role=role, reference_meta=reference_meta) ) - def register_output(self, url, role=None): + def register_output(self, url, role=None, add_meta=True): """ Add a URL of a file to the list of outputs (can be a filename or full url, if no URL specifier is given, assume 'file://') @@ -275,8 +278,11 @@ def register_output(self, url, role=None): filename or url of output file role: str role name that this output satisfies + add_meta: bool + If true, try to load reference metadata from input file + and add to provenance. """ - reference_meta = self._get_reference_meta(url=url) + reference_meta = self._get_reference_meta(url=url) if add_meta else None self._prov["output"].append( dict(url=url, role=role, reference_meta=reference_meta) ) diff --git a/src/ctapipe/core/tool.py b/src/ctapipe/core/tool.py index e1f7c2e2746..3579ba3c1bd 100644 --- a/src/ctapipe/core/tool.py +++ b/src/ctapipe/core/tool.py @@ -283,7 +283,7 @@ def load_config_file(self, path: str | pathlib.Path) -> None: # fall back to traitlets.config.Application's implementation super().load_config_file(str(path)) - Provenance().add_input_file(path, role="Tool Configuration") + Provenance().add_input_file(path, role="Tool Configuration", add_meta=False) def update_logging_config(self): """Update the configuration of loggers.""" diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index e4ff831dacb..cf89b11b461 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -345,8 +345,12 @@ def read_atmosphere_profile_from_simtel( if isinstance(simtelfile, str | Path): context_manager = SimTelFile(simtelfile) + # FIXME: simtel files currently do not have CTAO reference + # metadata, should be set to True once we store metadata Provenance().add_input_file( - filename=simtelfile, role="ctapipe.atmosphere.AtmosphereDensityProfile" + filename=simtelfile, + role="ctapipe.atmosphere.AtmosphereDensityProfile", + add_meta=False, ) else: diff --git a/src/ctapipe/reco/reconstructor.py b/src/ctapipe/reco/reconstructor.py index 24b708e9d4a..d09c7d4b362 100644 --- a/src/ctapipe/reco/reconstructor.py +++ b/src/ctapipe/reco/reconstructor.py @@ -150,7 +150,8 @@ def read(cls, path, parent=None, subarray=None, **kwargs): for attr, value in kwargs.items(): setattr(instance, attr, value) - Provenance().add_input_file(path, role="reconstructor") + # FIXME: we currently don't store metadata in the joblib / pickle files, see #2603 + Provenance().add_input_file(path, role="reconstructor", add_meta=False) return instance diff --git a/src/ctapipe/reco/sklearn.py b/src/ctapipe/reco/sklearn.py index 6b6c9cc133f..19655dc1a0c 100644 --- a/src/ctapipe/reco/sklearn.py +++ b/src/ctapipe/reco/sklearn.py @@ -670,7 +670,8 @@ def read(cls, path, **kwargs): f"{path} did not contain an instance of {cls}, got {instance}" ) - Provenance().add_input_file(path, role="ml-models") + # FIXME: we currently don't store metadata in the joblib / pickle files, see #2603 + Provenance().add_input_file(path, role="ml-models", add_meta=False) return instance @lazyproperty