-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Bubblewrap implementation for sandboxing #153
base: master
Are you sure you want to change the base?
Changes from all commits
79e4852
588e504
9ef250e
22d80af
b47ccd0
4922106
b7cee2a
63867b9
c47ec31
261a5df
cc85af3
afbca2e
e614782
2bbaa01
c8b2731
1866b3a
6551b2c
f978ad1
a1910a9
2a8033b
8a7e81b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,7 +67,8 @@ def watch_processes_finish(q: queue.Queue): | |
|
||
class EventProcessor: | ||
|
||
def __init__(self, context_dir=Path('.')): | ||
def __init__(self, sandbox: bool, context_dir=Path('.')): | ||
self.sandbox = sandbox | ||
self.context_dir = context_dir | ||
self.db = DamnitDB.from_dir(context_dir) | ||
# Fail fast if read-only - https://stackoverflow.com/a/44707371/434217 | ||
|
@@ -147,22 +148,33 @@ def handle_event(self, record, msg: dict, run_data: RunData): | |
|
||
with log_path.open('ab') as logf: | ||
# Create subprocess to process the run | ||
extract_proc = subprocess.Popen([ | ||
sys.executable, '-m', 'damnit.backend.extract_data', | ||
str(proposal), str(run), run_data.value | ||
], cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT) | ||
cmd = [sys.executable, '-m', 'damnit.backend.extract_data', str(proposal), | ||
str(run), run_data.value] | ||
|
||
if not self.sandbox: | ||
cmd.append('--no-sandbox') | ||
|
||
extract_proc = subprocess.Popen( | ||
cmd, cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT | ||
) | ||
self.extract_procs_queue.put((proposal, run, extract_proc)) | ||
|
||
def listen(): | ||
def listen(sandbox: bool): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we move the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm yeah, that was my first idea, but it depends on how 'secure' the configuration should be, since if it's in the database then users could enable/disable the sandboxing as easily as we could, unless the db is read only to the DAMNIT user, which would break a lot of things. Keeping the setting as part of how the process is started means that the user running the listener could have sandboxing on, while others can still modify the DB/run reprocessing themselves. I think options are:
With all options ending in "for now" 😛 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say lets go with option 2 for now, and later we can move it to the centralized listener settings (which should be inaccessible by users). |
||
# Set up logging to a file | ||
file_handler = logging.FileHandler("amore.log") | ||
formatter = logging.root.handlers[0].formatter | ||
file_handler.setFormatter(formatter) | ||
logging.root.addHandler(file_handler) | ||
|
||
log.info(f"Running on {platform.node()} under user {getpass.getuser()}, PID {os.getpid()}") | ||
|
||
if sandbox: | ||
log.info("Sandboxing of processes enabled") | ||
else: | ||
log.warning("Sandboxing disabled") | ||
|
||
try: | ||
with EventProcessor() as processor: | ||
with EventProcessor(sandbox=sandbox) as processor: | ||
processor.run() | ||
except KeyboardInterrupt: | ||
log.error("Stopping on Ctrl + C") | ||
|
@@ -176,6 +188,3 @@ def listen(): | |
# can start the backend). | ||
if os.stat("amore.log").st_uid == os.getuid(): | ||
os.chmod("amore.log", 0o666) | ||
|
||
if __name__ == '__main__': | ||
listen() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
from __future__ import annotations | ||
|
||
from pathlib import Path | ||
import shlex | ||
import subprocess | ||
|
||
from extra_data.read_machinery import find_proposal | ||
|
||
|
||
class Bubblewrap: | ||
"""A class representing a sandbox environment using Bubblewrap. | ||
|
||
Bubblewrap is a sandboxing tool that creates a restricted environment for processes, | ||
this class provides methods to configure and build a bubblewrap sandbox for running | ||
a context file such that it only has access to data from the relevant proposal. | ||
|
||
Attributes: | ||
command (list[str]): The base command for running in the sandbox. | ||
command_binds (list[tuple[str, str]]): List of bind mounts commands. | ||
""" | ||
|
||
def __init__(self): | ||
self.command = [ | ||
"bwrap", | ||
"--die-with-parent", # Kill sandbox if parent process dies | ||
"--unshare-all", # Unshare all namespaces | ||
"--share-net", # Share network namespace | ||
"--dev", "/dev", # Bind mount /dev | ||
"--bind", "/tmp", "/tmp", # Bind mount tmp in | ||
JamesWrigley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"--dir", "/gpfs", # Create empty directory at /gpfs | ||
] | ||
|
||
self.command_binds: list[tuple[str, str, str]] = [] | ||
|
||
for path in ( | ||
"/bin", | ||
"/etc/resolv.conf", | ||
"/lib", | ||
"/lib64", | ||
"/sbin", | ||
"/usr", | ||
): | ||
self.add_bind(Path(path), ro=True) | ||
|
||
if Path("/gpfs/exfel/sw/software").exists(): | ||
self.add_bind(Path("/gpfs/exfel/sw/software"), ro=True) | ||
|
||
def add_bind( | ||
self, source: Path, dest: Path | None = None, ro: bool = False | ||
) -> None: | ||
"""Adds a bind mount to the sandbox. | ||
|
||
!!! warning | ||
|
||
Bind mounts are done on inodes, if a program updates a file mounted into the | ||
sandbox by overwriting it then the inode changes and the file will not be | ||
in sync between host/sandbox. To avoid this mount the parent directory. | ||
|
||
Args: | ||
source (Path): The source path to be bind mounted. | ||
dest (Path, optional): The destination path in the sandbox. If not provided, the source path is used. | ||
ro (bool, optional): Whether the bind mount should be read-only. Defaults to False. | ||
|
||
Raises: | ||
ValueError: If the source path is not absolute. | ||
""" | ||
if not source.is_absolute(): | ||
raise ValueError("Source path must be absolute") | ||
|
||
if dest is None: | ||
dest = source | ||
|
||
self.command_binds.append( | ||
( | ||
f"--{'ro-' if ro else ''}bind", | ||
shlex.quote(str(source)), | ||
shlex.quote(str(dest)), | ||
) | ||
) | ||
|
||
def add_bind_proposal(self, proposal_id: int) -> None: | ||
"""Adds bind mounts for a proposal directory and its contents. | ||
|
||
Args: | ||
proposal_id (int): The ID of the proposal. | ||
|
||
Raises: | ||
FileNotFoundError: If the proposal directory is not found. | ||
""" | ||
proposal_dir = Path(find_proposal(f"p{proposal_id:06d}")) | ||
|
||
self.add_bind(proposal_dir) | ||
|
||
for path in proposal_dir.iterdir(): | ||
self.add_bind(path.resolve()) | ||
|
||
def add_bind_venv(self, python_exec: Path) -> None: | ||
"""Adds all paths required by a virtual environment to the sandbox. | ||
|
||
This function will use the given python executable to first call `sys.prefix` to | ||
check if the executable is in a venv, if it is then `sysconfig.get_paths()` is | ||
used to find required paths and add them paths as read-only binds. | ||
|
||
Args: | ||
python_exec (Path): The path to the Python executable. | ||
|
||
Raises: | ||
subprocess.CalledProcessError: If the command to get the virtual environment paths fails. | ||
""" | ||
venv = subprocess.check_output( | ||
[python_exec, "-c", "import sys; print(sys.prefix != sys.base_prefix)"] | ||
).decode("utf-8") | ||
|
||
if venv == "False": | ||
return | ||
Comment on lines
+114
to
+115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be surprising that this method is a complete no-op if the Python you point to isn't a venv. Especially if people use conda envs - they're technically not venvs, but I think people usually expect things to work in the same way. Would it make sense to turn this into It could also start by adding the main env directory (i.e. the bit before (Also, I think you'd actually end up with |
||
|
||
paths = subprocess.check_output( | ||
[ | ||
python_exec, | ||
"-c", | ||
'import sysconfig; print(" ".join(v for v in sysconfig.get_paths().values()))', | ||
] | ||
).decode("utf-8") | ||
|
||
for path in paths.split(): | ||
path = Path(path) | ||
self.add_bind(path, ro=True) | ||
if path.is_symlink(): | ||
self.add_bind(path.resolve(), ro=True) | ||
|
||
def build_command(self, command: str | list[str]) -> list[str]: | ||
"""Builds the final command for running in the sandbox. | ||
|
||
Args: | ||
command (str or list[str]): The command to be executed in the sandbox. | ||
|
||
Returns: | ||
list[str]: The final command for running in the sandbox. | ||
""" | ||
_command = self.command.copy() | ||
|
||
for bind in self.command_binds: | ||
_command.extend(bind) | ||
|
||
_command.append("--") # End of bubblewrap arguments | ||
|
||
_command.extend(command if isinstance(command, list) else [command]) | ||
|
||
return _command |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is that what we're trying to catch? Do we expect it to come up in real use, or only in testing?