Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bubblewrap implementation for sandboxing #153

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
79e4852
Add Bubblewrap implementation for sandboxing
RobertRosca Dec 10, 2023
588e504
Add tests for bubblewrap sandboxing
RobertRosca Dec 10, 2023
9ef250e
Remove redundant calls to `Path`
RobertRosca Dec 10, 2023
22d80af
Only bind gpfs sw if it exists
RobertRosca Dec 10, 2023
b47ccd0
Split bind mount string in two
RobertRosca Dec 10, 2023
4922106
Split bubblewrap args from command with explicit `--`
RobertRosca Dec 10, 2023
b7cee2a
Integrate bubblewrap with `extract_in_subprocess`
RobertRosca Dec 10, 2023
63867b9
Do not sandbox during fixture setup
RobertRosca Dec 10, 2023
c47ec31
Bind mount /tmp into bubblewrap
RobertRosca Dec 11, 2023
261a5df
Install bubblewrap in test environment
RobertRosca Dec 11, 2023
cc85af3
Add tests for read/write permissions/uid in sandbox
RobertRosca Dec 11, 2023
afbca2e
Add and propagate "--no-sandbox" flag
RobertRosca Dec 11, 2023
e614782
Explicitly bind out path in to bubblewrap
RobertRosca Dec 11, 2023
2bbaa01
Test propagation of `--no-sandbox`
RobertRosca Dec 11, 2023
c8b2731
Test bwrap call generated by `extract_in_subprocess`
RobertRosca Dec 11, 2023
1866b3a
Propagate sandbox flag to reprocess
RobertRosca Dec 11, 2023
6551b2c
Import from future for older python
RobertRosca Dec 11, 2023
f978ad1
Append no sandbox flag to command list
RobertRosca Dec 11, 2023
a1910a9
Mount in cwd for context/db, add note on file mounts
RobertRosca Dec 11, 2023
2a8033b
Add todo on mounting in the actual context file and db instead of cwd
RobertRosca Dec 11, 2023
8a7e81b
Fix reprocess `--no-sandbox` flag negation
RobertRosca Dec 20, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ jobs:
with:
packages: ${{ env.SYSTEM_PACKAGES }}

- name: Install bubblewrap
run: sudo apt update -y && sudo apt install bubblewrap -y

- uses: actions/cache@v3
with:
path: ~/.cache/pip
Expand Down
36 changes: 31 additions & 5 deletions damnit/backend/extract_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import contextlib
import getpass
import os
import logging
Expand Down Expand Up @@ -27,6 +28,7 @@
from ..ctxsupport.ctxrunner import get_user_variables
from ..definitions import UPDATE_BROKERS, UPDATE_TOPIC
from .db import DamnitDB
from .sandboxing import Bubblewrap



Expand Down Expand Up @@ -92,7 +94,7 @@ def loop():

def extract_in_subprocess(
proposal, run, out_path, cluster=False, run_data=RunData.ALL, match=(),
python_exe=None, mock=False, tee_output=None
python_exe=None, mock=False, tee_output=None, sandbox=True,
):
if not python_exe:
python_exe = sys.executable
Expand All @@ -106,6 +108,24 @@ def extract_in_subprocess(
for m in match:
args.extend(['--match', m])

if sandbox:
bubblewrap = Bubblewrap()
with contextlib.suppress(Exception):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
with contextlib.suppress(Exception):
with contextlib.suppress(FileNotFoundError):

Is that what we're trying to catch? Do we expect it to come up in real use, or only in testing?

bubblewrap.add_bind_proposal(proposal)
bubblewrap.add_bind_venv(Path(sys.executable))
if python_exe and Path(sys.executable) != Path(python_exe):
bubblewrap.add_bind_venv(Path(python_exe))
bubblewrap.add_bind(Path(__file__).parents[1] / 'ctxsupport') # ctxsupport_dir
JamesWrigley marked this conversation as resolved.
Show resolved Hide resolved
bubblewrap.add_bind(out_path.parent.absolute())
# NOTE: do not bind mount in the file path, this mount is done via inodes so if
# the file is updated by being overwritten then the mounted version will not be
# in sync
bubblewrap.add_bind(Path.cwd().absolute())
# TODO: done to get both the context file and the DB into the container, should
# be changed to work off of their actual paths instead of assuming it's in cwd

args = bubblewrap.build_command(args)

with TemporaryDirectory() as td:
# Save a separate copy of the reduced data, so we can send an update
# with only the variables that we've extracted.
Expand Down Expand Up @@ -217,7 +237,8 @@ def add_to_db(reduced_data, db: sqlite3.Connection, proposal, run):
class Extractor:
_proposal = None

def __init__(self):
def __init__(self, sandbox=True):
self.sandbox = sandbox
self.db = DamnitDB()
self.kafka_prd = KafkaProducer(
bootstrap_servers=UPDATE_BROKERS,
Expand Down Expand Up @@ -266,6 +287,7 @@ def extract_and_ingest(self, proposal, run, cluster=False,
reduced_data = extract_in_subprocess(
proposal, run, out_path, cluster=cluster, run_data=run_data,
match=match, python_exe=python_exe, mock=mock, tee_output=tee_output,
sandbox=self.sandbox
)
log.info("Reduced data has %d fields", len(reduced_data))
add_to_db(reduced_data, self.db.conn, proposal, run)
Expand All @@ -285,6 +307,9 @@ def extract_and_ingest(self, proposal, run, cluster=False,
for m in match:
python_cmd.extend(["--match", m])

if not self.sandbox:
python_cmd.append("--no-sandbox")

res = subprocess.run([
'sbatch', '--parsable',
*self.slurm_options(),
Expand All @@ -305,9 +330,9 @@ def proposal_runs(proposal):
return set(int(p.stem[1:]) for p in raw_dir.glob("*"))


def reprocess(runs, proposal=None, match=(), mock=False):
def reprocess(runs, proposal=None, match=(), mock=False, sandbox=True):
"""Called by the 'amore-proto reprocess' subcommand"""
extr = Extractor()
extr = Extractor(sandbox=sandbox)
if proposal is None:
proposal = extr.proposal

Expand Down Expand Up @@ -376,6 +401,7 @@ def reprocess(runs, proposal=None, match=(), mock=False):
ap.add_argument('run', type=int)
ap.add_argument('run_data', choices=('raw', 'proc', 'all'))
ap.add_argument('--cluster-job', action="store_true")
ap.add_argument('--no-sandbox', action="store_true")
ap.add_argument('--match', action="append", default=[])
args = ap.parse_args()
logging.basicConfig(level=logging.INFO,
Expand All @@ -389,7 +415,7 @@ def reprocess(runs, proposal=None, match=(), mock=False):
log.info("Extracting cluster variables in Slurm job %s on %s",
os.environ.get('SLURM_JOB_ID', '?'), socket.gethostname())

Extractor().extract_and_ingest(args.proposal, args.run,
Extractor(sandbox=not args.no_sandbox).extract_and_ingest(args.proposal, args.run,
cluster=args.cluster_job,
run_data=RunData(args.run_data),
match=args.match)
29 changes: 19 additions & 10 deletions damnit/backend/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def watch_processes_finish(q: queue.Queue):

class EventProcessor:

def __init__(self, context_dir=Path('.')):
def __init__(self, sandbox: bool, context_dir=Path('.')):
self.sandbox = sandbox
self.context_dir = context_dir
self.db = DamnitDB.from_dir(context_dir)
# Fail fast if read-only - https://stackoverflow.com/a/44707371/434217
Expand Down Expand Up @@ -147,22 +148,33 @@ def handle_event(self, record, msg: dict, run_data: RunData):

with log_path.open('ab') as logf:
# Create subprocess to process the run
extract_proc = subprocess.Popen([
sys.executable, '-m', 'damnit.backend.extract_data',
str(proposal), str(run), run_data.value
], cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT)
cmd = [sys.executable, '-m', 'damnit.backend.extract_data', str(proposal),
str(run), run_data.value]

if not self.sandbox:
cmd.append('--no-sandbox')

extract_proc = subprocess.Popen(
cmd, cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT
)
self.extract_procs_queue.put((proposal, run, extract_proc))

def listen():
def listen(sandbox: bool):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we move the sandbox setting to the databases metameta table? That way we can set it relatively easily with amore-proto db-config and we wouldn't have to restart the listener after changing it. It would also make it simpler to move to a centralized listener in the future and have different sandbox settings for different proposals.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm yeah, that was my first idea, but it depends on how 'secure' the configuration should be, since if it's in the database then users could enable/disable the sandboxing as easily as we could, unless the db is read only to the DAMNIT user, which would break a lot of things.

Keeping the setting as part of how the process is started means that the user running the listener could have sandboxing on, while others can still modify the DB/run reprocessing themselves.

I think options are:

  1. Make it part of the table with tables as read-only, enforces sandboxing for processes, but breaks user write permissions to the table.
  2. Make it part of the table, still read-write, risk of users disabling sandboxing.
  3. Some extra configuration file in the proposal directory/for the listener which is read-only to the DAMNIT user. This kind of config may end up being required as part of the move to a central listener anyway.
  4. Something set as part of the supervisor config?
  5. Leave it as a flag.

With all options ending in "for now" 😛

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say lets go with option 2 for now, and later we can move it to the centralized listener settings (which should be inaccessible by users).

# Set up logging to a file
file_handler = logging.FileHandler("amore.log")
formatter = logging.root.handlers[0].formatter
file_handler.setFormatter(formatter)
logging.root.addHandler(file_handler)

log.info(f"Running on {platform.node()} under user {getpass.getuser()}, PID {os.getpid()}")

if sandbox:
log.info("Sandboxing of processes enabled")
else:
log.warning("Sandboxing disabled")

try:
with EventProcessor() as processor:
with EventProcessor(sandbox=sandbox) as processor:
processor.run()
except KeyboardInterrupt:
log.error("Stopping on Ctrl + C")
Expand All @@ -176,6 +188,3 @@ def listen():
# can start the backend).
if os.stat("amore.log").st_uid == os.getuid():
os.chmod("amore.log", 0o666)

if __name__ == '__main__':
listen()
149 changes: 149 additions & 0 deletions damnit/backend/sandboxing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
from __future__ import annotations

from pathlib import Path
import shlex
import subprocess

from extra_data.read_machinery import find_proposal


class Bubblewrap:
"""A class representing a sandbox environment using Bubblewrap.

Bubblewrap is a sandboxing tool that creates a restricted environment for processes,
this class provides methods to configure and build a bubblewrap sandbox for running
a context file such that it only has access to data from the relevant proposal.

Attributes:
command (list[str]): The base command for running in the sandbox.
command_binds (list[tuple[str, str]]): List of bind mounts commands.
"""

def __init__(self):
self.command = [
"bwrap",
"--die-with-parent", # Kill sandbox if parent process dies
"--unshare-all", # Unshare all namespaces
"--share-net", # Share network namespace
"--dev", "/dev", # Bind mount /dev
"--bind", "/tmp", "/tmp", # Bind mount tmp in
JamesWrigley marked this conversation as resolved.
Show resolved Hide resolved
"--dir", "/gpfs", # Create empty directory at /gpfs
]

self.command_binds: list[tuple[str, str, str]] = []

for path in (
"/bin",
"/etc/resolv.conf",
"/lib",
"/lib64",
"/sbin",
"/usr",
):
self.add_bind(Path(path), ro=True)

if Path("/gpfs/exfel/sw/software").exists():
self.add_bind(Path("/gpfs/exfel/sw/software"), ro=True)

def add_bind(
self, source: Path, dest: Path | None = None, ro: bool = False
) -> None:
"""Adds a bind mount to the sandbox.

!!! warning

Bind mounts are done on inodes, if a program updates a file mounted into the
sandbox by overwriting it then the inode changes and the file will not be
in sync between host/sandbox. To avoid this mount the parent directory.

Args:
source (Path): The source path to be bind mounted.
dest (Path, optional): The destination path in the sandbox. If not provided, the source path is used.
ro (bool, optional): Whether the bind mount should be read-only. Defaults to False.

Raises:
ValueError: If the source path is not absolute.
"""
if not source.is_absolute():
raise ValueError("Source path must be absolute")

if dest is None:
dest = source

self.command_binds.append(
(
f"--{'ro-' if ro else ''}bind",
shlex.quote(str(source)),
shlex.quote(str(dest)),
)
)

def add_bind_proposal(self, proposal_id: int) -> None:
"""Adds bind mounts for a proposal directory and its contents.

Args:
proposal_id (int): The ID of the proposal.

Raises:
FileNotFoundError: If the proposal directory is not found.
"""
proposal_dir = Path(find_proposal(f"p{proposal_id:06d}"))

self.add_bind(proposal_dir)

for path in proposal_dir.iterdir():
self.add_bind(path.resolve())

def add_bind_venv(self, python_exec: Path) -> None:
"""Adds all paths required by a virtual environment to the sandbox.

This function will use the given python executable to first call `sys.prefix` to
check if the executable is in a venv, if it is then `sysconfig.get_paths()` is
used to find required paths and add them paths as read-only binds.

Args:
python_exec (Path): The path to the Python executable.

Raises:
subprocess.CalledProcessError: If the command to get the virtual environment paths fails.
"""
venv = subprocess.check_output(
[python_exec, "-c", "import sys; print(sys.prefix != sys.base_prefix)"]
).decode("utf-8")

if venv == "False":
return
Comment on lines +114 to +115
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be surprising that this method is a complete no-op if the Python you point to isn't a venv. Especially if people use conda envs - they're technically not venvs, but I think people usually expect things to work in the same way.

Would it make sense to turn this into add_bind_python and try to do the right thing with the target Python, venv or not?

It could also start by adding the main env directory (i.e. the bit before bin/python) and only add extra paths if they're not under that; it should be the same, but a shorter command is easier to make sense of if we ever need to.

(Also, I think you'd actually end up with "False\n" here if the target is not a venv, so the check would fail. It might be worth using JSON to send details back, just to avoid fiddly details like this.)


paths = subprocess.check_output(
[
python_exec,
"-c",
'import sysconfig; print(" ".join(v for v in sysconfig.get_paths().values()))',
]
).decode("utf-8")

for path in paths.split():
path = Path(path)
self.add_bind(path, ro=True)
if path.is_symlink():
self.add_bind(path.resolve(), ro=True)

def build_command(self, command: str | list[str]) -> list[str]:
"""Builds the final command for running in the sandbox.

Args:
command (str or list[str]): The command to be executed in the sandbox.

Returns:
list[str]: The final command for running in the sandbox.
"""
_command = self.command.copy()

for bind in self.command_binds:
_command.extend(bind)

_command.append("--") # End of bubblewrap arguments

_command.extend(command if isinstance(command, list) else [command])

return _command
10 changes: 5 additions & 5 deletions damnit/backend/supervisord.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def write_supervisord_conf(root_path):
if config_path.stat().st_uid == os.getuid():
os.chmod(config_path, 0o666)

def start_backend(root_path: Path, try_again=True):
def start_backend(root_path: Path, sandbox: bool, try_again=True):
config_path = root_path / "supervisord.conf"
if not config_path.is_file():
write_supervisord_conf(root_path)
Expand All @@ -106,10 +106,10 @@ def start_backend(root_path: Path, try_again=True):
return False

if try_again:
return start_backend(root_path, try_again=False)
return start_backend(root_path, sandbox, try_again=False)
elif rc == 3:
# 3 means it's stopped and we need to start the program
cmd = subprocess.run([*supervisorctl, "start", "damnit"])
cmd = subprocess.run([*supervisorctl, "start", "damnit", "" if sandbox else "--no-sandbox"])
if cmd.returncode != 0:
log.error(f"Couldn't start supervisord, tried to run command: {' '.join(cmd)}\n"
f"Return code: {cmd.returncode}"
Expand All @@ -134,7 +134,7 @@ def start_backend(root_path: Path, try_again=True):

return True

def initialize_and_start_backend(root_path, proposal=None):
def initialize_and_start_backend(root_path, sandbox = True, proposal=None):
# Ensure the directory exists
root_path.mkdir(parents=True, exist_ok=True)
if root_path.stat().st_uid == os.getuid():
Expand All @@ -160,4 +160,4 @@ def initialize_and_start_backend(root_path, proposal=None):
os.chmod(context_path, 0o666)

# Start backend
return start_backend(root_path)
return start_backend(root_path, sandbox)
4 changes: 2 additions & 2 deletions damnit/backend/test_listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ def run(self):
log.error("Error processing event", exc_info=True)


def listen():
def listen(sandbox: bool):
try:
with TestEventProcessor() as processor:
with TestEventProcessor(sandbox) as processor:
processor.run()
except KeyboardInterrupt:
print("Stopping on Ctrl-C")
Expand Down
Loading