UCL · matt-graham · Sep 26, 2024 · Nov 14, 2023 · Dec 8, 2023 · Dec 8, 2023
diff --git a/docs/conf.py b/docs/conf.py
@@ -106,6 +106,9 @@
     'exclude-members': '__dict__, name, rng, sim'  # , read_parameters',
 }
 
+# Include both class level and __init__ docstring content in class documentation
+autoclass_content = 'both'
+
 # The checker can't see private repos
 linkcheck_ignore = ['^https://github.com/UCL/TLOmodel.*',
                     'https://www.who.int/bulletin/volumes/88/8/09-068213/en/nn']

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,8 @@ dependencies = [
     "azure-identity",
     "azure-keyvault",
     "azure-storage-file-share",
+    # For saving and loading simulation state
+    "dill",
 ]
 description = "Thanzi la Onse Epidemiology Model"
 dynamic = ["version"]

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -56,6 +56,8 @@ cryptography==41.0.3
     #   pyjwt
 cycler==0.11.0
     # via matplotlib
+dill==0.3.8
+    # via tlo (pyproject.toml)
 et-xmlfile==1.1.0
     # via openpyxl
 fonttools==4.42.1
@@ -112,6 +114,7 @@ pyjwt[crypto]==2.8.0
     # via
     #   adal
     #   msal
+    #   pyjwt
 pyparsing==3.1.1
     # via matplotlib
 pyshp==2.3.1

diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
 #    pip-compile --extra=dev --output-file=requirements/dev.txt
@@ -61,7 +61,9 @@ colorama==0.4.6
 contourpy==1.1.1
     # via matplotlib
 coverage[toml]==7.3.1
-    # via pytest-cov
+    # via
+    #   coverage
+    #   pytest-cov
 cryptography==41.0.3
     # via
     #   adal
@@ -72,14 +74,14 @@ cryptography==41.0.3
     #   pyjwt
 cycler==0.11.0
     # via matplotlib
-dill==0.3.7
-    # via pylint
+dill==0.3.8
+    # via
+    #   pylint
+    #   tlo (pyproject.toml)
 distlib==0.3.7
     # via virtualenv
 et-xmlfile==1.1.0
     # via openpyxl
-exceptiongroup==1.1.3
-    # via pytest
 execnet==2.0.2
     # via pytest-xdist
 filelock==3.12.4
@@ -94,10 +96,6 @@ gitpython==3.1.36
     # via tlo (pyproject.toml)
 idna==3.4
     # via requests
-importlib-metadata==6.8.0
-    # via build
-importlib-resources==6.1.1
-    # via matplotlib
 iniconfig==2.0.0
     # via pytest
 isodate==0.6.1
@@ -172,6 +170,7 @@ pyjwt[crypto]==2.8.0
     # via
     #   adal
     #   msal
+    #   pyjwt
 pylint==3.0.1
     # via tlo (pyproject.toml)
 pyparsing==3.1.1
@@ -221,29 +220,17 @@ smmap==5.0.1
     # via gitdb
 squarify==0.4.3
     # via tlo (pyproject.toml)
-tomli==2.0.1
-    # via
-    #   build
-    #   coverage
-    #   pip-tools
-    #   pylint
-    #   pyproject-api
-    #   pyproject-hooks
-    #   pytest
-    #   tox
 tomlkit==0.12.1
     # via pylint
 tox==4.11.3
     # via tlo (pyproject.toml)
 typing-extensions==4.8.0
     # via
-    #   astroid
     #   azure-core
     #   azure-keyvault-certificates
     #   azure-keyvault-keys
     #   azure-keyvault-secrets
     #   azure-storage-file-share
-    #   pylint
 tzdata==2023.3
     # via pandas
 urllib3==2.0.4
@@ -254,10 +241,6 @@ virtualenv==20.24.5
     #   tox
 wheel==0.41.2
     # via pip-tools
-zipp==3.17.0
-    # via
-    #   importlib-metadata
-    #   importlib-resources
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
@@ -1,6 +1,7 @@
 """
 General utility functions for TLO analysis
 """
+import fileinput
 import gzip
 import json
 import os
@@ -86,6 +87,40 @@ def parse_log_file(log_filepath, level: int = logging.INFO):
     return LogsDict({name: handle.name for name, handle in module_name_to_filehandle.items()}, level)
 
 
+def merge_log_files(log_path_1: Path, log_path_2: Path, output_path: Path) -> None:
+    """Merge two log files, skipping any repeated header lines.
+
+    :param log_path_1: Path to first log file to merge. Records from this log file will
+        appear first in merged log file.
+    :param log_path_2: Path to second log file to merge. Records from this log file will
+        appear after those in log file at `log_path_1` and any header lines in this file
+        which are also present in log file at `log_path_1` will be skipped.
+    :param output_path: Path to write merged log file to. Must not be one of `log_path_1`
+        or `log_path_2` as data is read from files while writing to this path.
+    """
+    if output_path == log_path_1 or output_path == log_path_2:
+        msg = "output_path must not be equal to log_path_1 or log_path_2"
+        raise ValueError(msg)
+    with fileinput.input(files=(log_path_1, log_path_2), mode="r") as log_lines:
+        with output_path.open("w") as output_file:
+            written_header_lines = {}
+            for log_line in log_lines:
+                log_data = json.loads(log_line)
+                if "type" in log_data and log_data["type"] == "header":
+                    if log_data["uuid"] in written_header_lines:
+                        previous_header_line = written_header_lines[log_data["uuid"]]
+                        if  previous_header_line == log_line:
+                            continue
+                        else:
+                            msg = (
+                                "Inconsistent header lines with matching UUIDs found when merging logs:\n"
+                                f"{previous_header_line}\n{log_line}\n"
+                            )
+                            raise RuntimeError(msg)
+                    written_header_lines[log_data["uuid"]] = log_line
+                output_file.write(log_line)
+
+
 def write_log_to_excel(filename, log_dataframes):
     """Takes the output of parse_log_file() and creates an Excel file from dataframes"""
     metadata = list()
@@ -1131,7 +1166,7 @@ def get_parameters_for_status_quo() -> Dict:
             "equip_availability": "all",  # <--- NB. Existing calibration is assuming all equipment is available
         },
     }
-    
+
 def get_parameters_for_standard_mode2_runs() -> Dict:
     """
     Returns a dictionary of parameters and their updated values to indicate

diff --git a/src/tlo/scenario.py b/src/tlo/scenario.py
@@ -73,6 +73,7 @@ def draw_parameters(self, draw_number, rng):
 
 from tlo import Date, Simulation, logging
 from tlo.analysis.utils import parse_log_file
+from tlo.util import str_to_pandas_date
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -141,6 +142,16 @@ def parse_arguments(self, extra_arguments: List[str]) -> None:
         self.arguments = extra_arguments
 
         parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--resume-simulation",
+            type=str,
+            help="Directory containing suspended state files to resume simulation from",
+        )
+        parser.add_argument(
+            "--suspend-date",
+            type=str_to_pandas_date,
+            help="Date to suspend the simulation at",
+        )
 
         # add arguments from the subclass
         self.add_arguments(parser)
@@ -382,20 +393,58 @@ def run_sample_by_number(self, output_directory, draw_number, sample_number):
         sample = self.get_sample(draw, sample_number)
         log_config = self.scenario.get_log_config(output_directory)
 
-        logger.info(key="message", data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}")
-
-        sim = Simulation(
-            start_date=self.scenario.start_date,
-            seed=sample["simulation_seed"],
-            log_config=log_config
+        logger.info(
+            key="message",
+            data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}",
         )
-        sim.register(*self.scenario.modules())
 
-        if sample["parameters"] is not None:
-            self.override_parameters(sim, sample["parameters"])
-
-        sim.make_initial_population(n=self.scenario.pop_size)
-        sim.simulate(end_date=self.scenario.end_date)
+        # if user has specified a restore simulation, we load it from a pickle file
+        if (
+            hasattr(self.scenario, "resume_simulation")
+            and self.scenario.resume_simulation is not None
+        ):
+            suspended_simulation_path = (
+                Path(self.scenario.resume_simulation)
+                / str(draw_number)
+                / str(sample_number)
+                / "suspended_simulation.pickle"
+            )
+            logger.info(
+                key="message",
+                data=f"Loading pickled suspended simulation from {suspended_simulation_path}",
+            )
+            sim = Simulation.load_from_pickle(pickle_path=suspended_simulation_path, log_config=log_config)
+        else:
+            sim = Simulation(
+                start_date=self.scenario.start_date,
+                seed=sample["simulation_seed"],
+                log_config=log_config,
+            )
+            sim.register(*self.scenario.modules())
+
+            if sample["parameters"] is not None:
+                self.override_parameters(sim, sample["parameters"])
+
+            sim.make_initial_population(n=self.scenario.pop_size)
+            sim.initialise(end_date=self.scenario.end_date)
+
+        # if user has specified a suspend date, we run the simulation to that date and
+        # save it to a pickle file
+        if (
+            hasattr(self.scenario, "suspend_date")
+            and self.scenario.suspend_date is not None
+        ):
+            sim.run_simulation_to(to_date=self.scenario.suspend_date)
+            suspended_simulation_path = Path(log_config["directory"]) / "suspended_simulation.pickle"
+            sim.save_to_pickle(pickle_path=suspended_simulation_path)
+            sim.close_output_file()
+            logger.info(
+                key="message",
+                data=f"Simulation suspended at {self.scenario.suspend_date} and saved to {suspended_simulation_path}",
+            )
+        else:
+            sim.run_simulation_to(to_date=self.scenario.end_date)
+            sim.finalise()
 
         if sim.log_filepath is not None:
             outputs = parse_log_file(sim.log_filepath)