European-XFEL · RobertRosca · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -30,6 +30,9 @@ jobs:
       with:
         packages: ${{ env.SYSTEM_PACKAGES }}
 
+    - name: Install bubblewrap
+      run: sudo apt update -y && sudo apt install bubblewrap -y
+
     - uses: actions/cache@v3
       with:
         path: ~/.cache/pip

diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py
@@ -1,4 +1,5 @@
 import argparse
+import contextlib
 import getpass
 import os
 import logging
@@ -27,6 +28,7 @@
 from ..ctxsupport.ctxrunner import get_user_variables
 from ..definitions import UPDATE_BROKERS, UPDATE_TOPIC
 from .db import DamnitDB
+from .sandboxing import Bubblewrap
 
 
 
@@ -92,7 +94,7 @@ def loop():
 
 def extract_in_subprocess(
         proposal, run, out_path, cluster=False, run_data=RunData.ALL, match=(),
-        python_exe=None, mock=False, tee_output=None
+        python_exe=None, mock=False, tee_output=None, sandbox=True,
 ):
     if not python_exe:
         python_exe = sys.executable
@@ -106,6 +108,24 @@ def extract_in_subprocess(
     for m in match:
         args.extend(['--match', m])
 
+    if sandbox:
+        bubblewrap = Bubblewrap()
+        with contextlib.suppress(Exception):
-        with contextlib.suppress(Exception):
+        with contextlib.suppress(FileNotFoundError):
-        with contextlib.suppress(Exception):
+        with contextlib.suppress(FileNotFoundError):
+            bubblewrap.add_bind_proposal(proposal)
+        bubblewrap.add_bind_venv(Path(sys.executable))
+        if python_exe and Path(sys.executable) != Path(python_exe):
+            bubblewrap.add_bind_venv(Path(python_exe))
+        bubblewrap.add_bind(Path(__file__).parents[1] / 'ctxsupport') # ctxsupport_dir
+        bubblewrap.add_bind(out_path.parent.absolute())
+        # NOTE: do not bind mount in the file path, this mount is done via inodes so if
+        # the file is updated by being overwritten then the mounted version will not be
+        # in sync
+        bubblewrap.add_bind(Path.cwd().absolute())
+        # TODO: done to get both the context file and the DB into the container, should
+        # be changed to work off of their actual paths instead of assuming it's in cwd
+
+        args = bubblewrap.build_command(args)
+
     with TemporaryDirectory() as td:
         # Save a separate copy of the reduced data, so we can send an update
         # with only the variables that we've extracted.
@@ -217,7 +237,8 @@ def add_to_db(reduced_data, db: sqlite3.Connection, proposal, run):
 class Extractor:
     _proposal = None
 
-    def __init__(self):
+    def __init__(self, sandbox=True):
+        self.sandbox = sandbox
         self.db = DamnitDB()
         self.kafka_prd = KafkaProducer(
             bootstrap_servers=UPDATE_BROKERS,
@@ -266,6 +287,7 @@ def extract_and_ingest(self, proposal, run, cluster=False,
         reduced_data = extract_in_subprocess(
             proposal, run, out_path, cluster=cluster, run_data=run_data,
             match=match, python_exe=python_exe, mock=mock, tee_output=tee_output,
+            sandbox=self.sandbox
         )
         log.info("Reduced data has %d fields", len(reduced_data))
         add_to_db(reduced_data, self.db.conn, proposal, run)
@@ -285,6 +307,9 @@ def extract_and_ingest(self, proposal, run, cluster=False,
             for m in match:
                 python_cmd.extend(["--match", m])
 
+            if not self.sandbox:
+                python_cmd.append("--no-sandbox")
+
             res = subprocess.run([
                 'sbatch', '--parsable',
                 *self.slurm_options(),
@@ -305,9 +330,9 @@ def proposal_runs(proposal):
     return set(int(p.stem[1:]) for p in raw_dir.glob("*"))
 
 
-def reprocess(runs, proposal=None, match=(), mock=False):
+def reprocess(runs, proposal=None, match=(), mock=False, sandbox=True):
     """Called by the 'amore-proto reprocess' subcommand"""
-    extr = Extractor()
+    extr = Extractor(sandbox=sandbox)
     if proposal is None:
         proposal = extr.proposal
 
@@ -376,6 +401,7 @@ def reprocess(runs, proposal=None, match=(), mock=False):
     ap.add_argument('run', type=int)
     ap.add_argument('run_data', choices=('raw', 'proc', 'all'))
     ap.add_argument('--cluster-job', action="store_true")
+    ap.add_argument('--no-sandbox', action="store_true")
     ap.add_argument('--match', action="append", default=[])
     args = ap.parse_args()
     logging.basicConfig(level=logging.INFO,
@@ -389,7 +415,7 @@ def reprocess(runs, proposal=None, match=(), mock=False):
         log.info("Extracting cluster variables in Slurm job %s on %s",
                  os.environ.get('SLURM_JOB_ID', '?'), socket.gethostname())
 
-    Extractor().extract_and_ingest(args.proposal, args.run,
+    Extractor(sandbox=not args.no_sandbox).extract_and_ingest(args.proposal, args.run,
                                    cluster=args.cluster_job,
                                    run_data=RunData(args.run_data),
                                    match=args.match)
diff --git a/damnit/backend/listener.py b/damnit/backend/listener.py
@@ -67,7 +67,8 @@ def watch_processes_finish(q: queue.Queue):
 
 class EventProcessor:
 
-    def __init__(self, context_dir=Path('.')):
+    def __init__(self, sandbox: bool, context_dir=Path('.')):
+        self.sandbox = sandbox
         self.context_dir = context_dir
         self.db = DamnitDB.from_dir(context_dir)
         # Fail fast if read-only - https://stackoverflow.com/a/44707371/434217
@@ -147,22 +148,33 @@ def handle_event(self, record, msg: dict, run_data: RunData):
 
         with log_path.open('ab') as logf:
             # Create subprocess to process the run
-            extract_proc = subprocess.Popen([
-                sys.executable, '-m', 'damnit.backend.extract_data',
-                str(proposal), str(run), run_data.value
-            ], cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT)
+            cmd = [sys.executable, '-m', 'damnit.backend.extract_data', str(proposal),
+                   str(run), run_data.value]
+
+            if not self.sandbox:
+                cmd.append('--no-sandbox')
+
+            extract_proc = subprocess.Popen(
+                cmd, cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT
+            )
         self.extract_procs_queue.put((proposal, run, extract_proc))
 
-def listen():
+def listen(sandbox: bool):
     # Set up logging to a file
     file_handler = logging.FileHandler("amore.log")
     formatter = logging.root.handlers[0].formatter
     file_handler.setFormatter(formatter)
     logging.root.addHandler(file_handler)
 
     log.info(f"Running on {platform.node()} under user {getpass.getuser()}, PID {os.getpid()}")
+
+    if sandbox:
+        log.info("Sandboxing of processes enabled")
+    else:
+        log.warning("Sandboxing disabled")
+
     try:
-        with EventProcessor() as processor:
+        with EventProcessor(sandbox=sandbox) as processor:
             processor.run()
     except KeyboardInterrupt:
         log.error("Stopping on Ctrl + C")
@@ -176,6 +188,3 @@ def listen():
     # can start the backend).
     if os.stat("amore.log").st_uid == os.getuid():
         os.chmod("amore.log", 0o666)
-
-if __name__ == '__main__':
-    listen()
diff --git a/damnit/backend/sandboxing.py b/damnit/backend/sandboxing.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+from pathlib import Path
+import shlex
+import subprocess
+
+from extra_data.read_machinery import find_proposal
+
+
+class Bubblewrap:
+    """A class representing a sandbox environment using Bubblewrap.
+
+    Bubblewrap is a sandboxing tool that creates a restricted environment for processes,
+    this class provides methods to configure and build a bubblewrap sandbox for running
+    a context file such that it only has access to data from the relevant proposal.
+
+    Attributes:
+        command (list[str]): The base command for running in the sandbox.
+        command_binds (list[tuple[str, str]]): List of bind mounts commands.
+    """
+
+    def __init__(self):
+        self.command = [
+            "bwrap",
+            "--die-with-parent",  # Kill sandbox if parent process dies
+            "--unshare-all",  # Unshare all namespaces
+            "--share-net",  # Share network namespace
+            "--dev", "/dev",  # Bind mount /dev
+            "--bind", "/tmp", "/tmp",  # Bind mount tmp in
+            "--dir", "/gpfs",  # Create empty directory at /gpfs
+        ]
+
+        self.command_binds: list[tuple[str, str, str]] = []
+
+        for path in (
+            "/bin",
+            "/etc/resolv.conf",
+            "/lib",
+            "/lib64",
+            "/sbin",
+            "/usr",
+        ):
+            self.add_bind(Path(path), ro=True)
+
+        if Path("/gpfs/exfel/sw/software").exists():
+            self.add_bind(Path("/gpfs/exfel/sw/software"), ro=True)
+
+    def add_bind(
+        self, source: Path, dest: Path | None = None, ro: bool = False
+    ) -> None:
+        """Adds a bind mount to the sandbox.
+
+        !!! warning
+
+            Bind mounts are done on inodes, if a program updates a file mounted into the
+            sandbox by overwriting it then the inode changes and the file will not be
+            in sync between host/sandbox. To avoid this mount the parent directory.
+
+        Args:
+            source (Path): The source path to be bind mounted.
+            dest (Path, optional): The destination path in the sandbox. If not provided, the source path is used.
+            ro (bool, optional): Whether the bind mount should be read-only. Defaults to False.
+
+        Raises:
+            ValueError: If the source path is not absolute.
+        """
+        if not source.is_absolute():
+            raise ValueError("Source path must be absolute")
+
+        if dest is None:
+            dest = source
+
+        self.command_binds.append(
+            (
+                f"--{'ro-' if ro else ''}bind",
+                shlex.quote(str(source)),
+                shlex.quote(str(dest)),
+            )
+        )
+
+    def add_bind_proposal(self, proposal_id: int) -> None:
+        """Adds bind mounts for a proposal directory and its contents.
+
+        Args:
+            proposal_id (int): The ID of the proposal.
+
+        Raises:
+            FileNotFoundError: If the proposal directory is not found.
+        """
+        proposal_dir = Path(find_proposal(f"p{proposal_id:06d}"))
+
+        self.add_bind(proposal_dir)
+
+        for path in proposal_dir.iterdir():
+            self.add_bind(path.resolve())
+
+    def add_bind_venv(self, python_exec: Path) -> None:
+        """Adds all paths required by a virtual environment to the sandbox.
+
+        This function will use the given python executable to first call `sys.prefix` to
+        check if the executable is in a venv, if it is then `sysconfig.get_paths()` is
+        used to find required paths and add them paths as read-only binds.
+
+        Args:
+            python_exec (Path): The path to the Python executable.
+
+        Raises:
+            subprocess.CalledProcessError: If the command to get the virtual environment paths fails.
+        """
+        venv = subprocess.check_output(
+            [python_exec, "-c", "import sys; print(sys.prefix != sys.base_prefix)"]
+        ).decode("utf-8")
+
+        if venv == "False":
+            return
+
+        paths = subprocess.check_output(
+            [
+                python_exec,
+                "-c",
+                'import sysconfig; print(" ".join(v for v in sysconfig.get_paths().values()))',
+            ]
+        ).decode("utf-8")
+
+        for path in paths.split():
+            path = Path(path)
+            self.add_bind(path, ro=True)
+            if path.is_symlink():
+                self.add_bind(path.resolve(), ro=True)
+
+    def build_command(self, command: str | list[str]) -> list[str]:
+        """Builds the final command for running in the sandbox.
+
+        Args:
+            command (str or list[str]): The command to be executed in the sandbox.
+
+        Returns:
+            list[str]: The final command for running in the sandbox.
+        """
+        _command = self.command.copy()
+
+        for bind in self.command_binds:
+            _command.extend(bind)
+
+        _command.append("--")  # End of bubblewrap arguments
+
+        _command.extend(command if isinstance(command, list) else [command])
+
+        return _command
diff --git a/damnit/backend/supervisord.py b/damnit/backend/supervisord.py
@@ -80,7 +80,7 @@ def write_supervisord_conf(root_path):
     if config_path.stat().st_uid == os.getuid():
         os.chmod(config_path, 0o666)
 
-def start_backend(root_path: Path, try_again=True):
+def start_backend(root_path: Path, sandbox: bool, try_again=True):
     config_path = root_path / "supervisord.conf"
     if not config_path.is_file():
         write_supervisord_conf(root_path)
@@ -106,10 +106,10 @@ def start_backend(root_path: Path, try_again=True):
             return False
 
         if try_again:
-            return start_backend(root_path, try_again=False)
+            return start_backend(root_path, sandbox, try_again=False)
     elif rc == 3:
         # 3 means it's stopped and we need to start the program
-        cmd = subprocess.run([*supervisorctl, "start", "damnit"])
+        cmd = subprocess.run([*supervisorctl, "start", "damnit", "" if sandbox else "--no-sandbox"])
         if cmd.returncode != 0:
             log.error(f"Couldn't start supervisord, tried to run command: {' '.join(cmd)}\n"
                       f"Return code: {cmd.returncode}"
@@ -134,7 +134,7 @@ def start_backend(root_path: Path, try_again=True):
 
     return True
 
-def initialize_and_start_backend(root_path, proposal=None):
+def initialize_and_start_backend(root_path, sandbox = True, proposal=None):
     # Ensure the directory exists
     root_path.mkdir(parents=True, exist_ok=True)
     if root_path.stat().st_uid == os.getuid():
@@ -160,4 +160,4 @@ def initialize_and_start_backend(root_path, proposal=None):
         os.chmod(context_path, 0o666)
 
     # Start backend
-    return start_backend(root_path)
+    return start_backend(root_path, sandbox)
diff --git a/damnit/backend/test_listener.py b/damnit/backend/test_listener.py
@@ -55,9 +55,9 @@ def run(self):
                 log.error("Error processing event", exc_info=True)
 
 
-def listen():
+def listen(sandbox: bool):
     try:
-        with TestEventProcessor() as processor:
+        with TestEventProcessor(sandbox) as processor:
             processor.run()
     except KeyboardInterrupt:
         print("Stopping on Ctrl-C")