From d9689a058d0d6ec7e33bfed28b6bde59ebf3a535 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 13 Dec 2023 15:14:44 +0000 Subject: [PATCH 01/17] Make bennettbot services run in docker containers --- .gitignore | 1 + docker/Dockerfile | 152 ++++++++++++++++++++++++++++++++++ docker/build-dependencies.txt | 2 + docker/dependencies.txt | 5 ++ docker/docker-compose.yaml | 91 ++++++++++++++++++++ docker/entrypoints/check.sh | 8 ++ docker/entrypoints/dev.sh | 5 ++ docker/entrypoints/prod.sh | 5 ++ docker/justfile | 65 +++++++++++++++ 9 files changed, 334 insertions(+) create mode 100644 docker/Dockerfile create mode 100644 docker/build-dependencies.txt create mode 100644 docker/dependencies.txt create mode 100644 docker/docker-compose.yaml create mode 100755 docker/entrypoints/check.sh create mode 100755 docker/entrypoints/dev.sh create mode 100755 docker/entrypoints/prod.sh create mode 100644 docker/justfile diff --git a/.gitignore b/.gitignore index 75f09f96..52e38331 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ environment .env workspace/techsupport/ooo.json +gcp-credentials.json # System files *.py[co] diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..41a2b6c2 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,152 @@ +# syntax=docker/dockerfile:1.2 +################################################# +# +# Create base image with python installed. +# +# DL3007 ignored because base-docker we specifically always want to build on +# the latest base image, by design. +# +# hadolint ignore=DL3007 +FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python + +# we are going to use an apt cache on the host, so disable the default debian +# docker clean up that deletes that cache on every apt install +RUN rm -f /etc/apt/apt.conf.d/docker-clean + +# Add deadsnakes PPA for installing new Python versions +# ensure fully working base python3 installation +# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e +# use space efficient utility from base image + +RUN --mount=type=cache,target=/var/cache/apt \ + echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\ + /usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc + +# install any system dependencies +COPY docker/dependencies.txt /tmp/dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/dependencies.txt + +################################################## +# +# Build image +# +# Ok, now we have local base image with python and our system dependencies on. +# We'll use this as the base for our builder image, where we'll build and +# install any python packages needed. +# +# We use a separate, disposable build image to avoid carrying the build +# dependencies into the production image. +FROM base-python as builder + +# Install any system build dependencies +COPY docker/build-dependencies.txt /tmp/build-dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/build-dependencies.txt + +# Install everything in venv for isolation from system python libraries +RUN python3.11 -m venv /opt/venv +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +# The cache mount means a) /root/.cache is not in the image, and b) it's preserved +# between docker builds locally, for faster dev rebuild. +COPY requirements.prod.txt /tmp/requirements.prod.txt + +# DL3042: using cache mount instead +# DL3013: we always want latest pip/setuptools/wheel, at least for now +# hadolint ignore=DL3042,DL3013 +RUN --mount=type=cache,target=/root/.cache \ + python -m pip install -U pip setuptools wheel && \ + python -m pip install --require-hashes --requirement /tmp/requirements.prod.txt + + +################################################## +# +# Base project image +# +# Ok, we've built everything we need, build an image with all dependencies but +# no code. +# +# Not including the code at this stage has two benefits: +# +# 1) this image only rebuilds when the handlful of files needed to build output-explorer-base +# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes. +# +# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded +# version of the code. Otherwise, we could end up accidentally using the +# version of the code included when the prod image was built. +FROM base-python as bennettbot-base + +# copy venv over from builder image. These will have root:root ownership, but +# are readable by all. +COPY --from=builder /opt/venv /opt/venv + +# Ensure we're using the venv by default +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +RUN mkdir -p /app +WORKDIR /app + +# This may not be necessary, but it probably doesn't hurt +ENV PYTHONPATH=/app + + +################################################## +# +# Production image +# +# Copy code in, add proper metadata +FROM bennettbot-base as bennettbot-prod + +# Dokku is using port 7000 rather than the default 5000 +# We need to expose it here so that the dokku checks will use the correct port +EXPOSE 7000 + +# Adjust this metadata to fit project. Note that the base-docker image does set +# some basic metadata. +LABEL org.opencontainers.image.title="bennettbot" \ + org.opencontainers.image.description="bennettbot" \ + org.opencontainers.image.source="https://github.com/ebmdatalab/ebmbot" + +# copy application code +COPY . /app + + +# We set command rather than entrypoint, to make it easier to run different +# things from the cli +CMD ["/app/docker/entrypoints/prod.sh"] + +# finally, tag with build information. These will change regularly, therefore +# we do them as the last action. +ARG BUILD_DATE=unknown +LABEL org.opencontainers.image.created=$BUILD_DATE +ARG GITREF=unknown +LABEL org.opencontainers.image.revision=$GITREF + +ARG USERID=10001 +ARG GROUPID=10001 +USER ${USERID}:${GROUPID} + +################################################## +# +# Dev image +# +# Now we build a dev image from our output-explorer-base image. This is basically +# installing dev dependencies and changing the entrypoint +# +FROM bennettbot-base as bennettbot-dev + +# install development requirements +COPY requirements.dev.txt /tmp/requirements.dev.txt +# using cache mount instead +# hadolint ignore=DL3042 +RUN --mount=type=cache,target=/root/.cache \ + python -m pip install --require-hashes --requirement /tmp/requirements.dev.txt + +# Override ENTRYPOINT rather than CMD so we can pass arbitrary commands to the entrypoint script +ENTRYPOINT ["/app/docker/entrypoints/dev.sh"] + +# Run as non root user. Required when building image. +ARG USERID +ARG GROUPID +USER ${USERID}:${GROUPID} diff --git a/docker/build-dependencies.txt b/docker/build-dependencies.txt new file mode 100644 index 00000000..4d8c9e21 --- /dev/null +++ b/docker/build-dependencies.txt @@ -0,0 +1,2 @@ +# list ubuntu packges needed to build dependencies, one per line +python3.11-dev diff --git a/docker/dependencies.txt b/docker/dependencies.txt new file mode 100644 index 00000000..2ba51fe3 --- /dev/null +++ b/docker/dependencies.txt @@ -0,0 +1,5 @@ +# list ubuntu packages needed in production, one per line +python3.11 +python3.11-distutils +python3.11-venv +sqlite3 diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml new file mode 100644 index 00000000..fddddc35 --- /dev/null +++ b/docker/docker-compose.yaml @@ -0,0 +1,91 @@ +# note: we do not run prod service with docker-compose, we use it just for +# configuring the production build +services: + prod: + # image name, both locally and public + image: bennettbot + build: + context: .. + # path relative to context + dockerfile: docker/Dockerfile + # the prod stage in the Dockerfile + target: bennettbot-prod + # should speed up the build in CI, where we have a cold cache + cache_from: # should speed up the build in CI, where we have a cold cache + - ghcr.io/opensafely-core/base-docker + - ghcr.io/ebmdatalab/bennettbot + args: + # this makes the image work for later cache_from: usage + - BUILDKIT_INLINE_CACHE=1 + # env vars supplied by just + - BUILD_DATE + - GITREF + # use dockers builitin PID daemon + init: true + + bot: + extends: + service: prod + container_name: bennettbot-bot + command: python -m ebmbot.bot + + dispatcher: + extends: + service: prod + container_name: bennettbot-dispatcher + command: python -m ebmbot.dispatcher + + webserver: + extends: + service: prod + container_name: bennettbot-webserver + command: python -m ebmbot.webserver + # host:container ports: container port should match the port in WEBHOOK_ORIGIN + ports: + - "9999:9999" + + # main development service + dev: + extends: + service: prod + image: bennettbot-dev + env_file: ../.env + container_name: bennettbot-dev + build: + # the dev stage in the Dockerfile + target: bennettbot-dev + args: + # user developer uid:gid in dev + - USERID=${DEV_USERID:-1000} + - GROUPID=${DEV_GROUPID:-1000} + # paths relative to docker-compose.yaml file + volumes: + - ..:/app + + dev_bot: + extends: + service: dev + container_name: bennettbot-bot + command: python -m ebmbot.bot + + dev_dispatcher: + extends: + service: dev + container_name: bennettbot-dispatcher + command: python -m ebmbot.dispatcher + + dev_webserver: + extends: + service: dev + container_name: bennettbot-webserver + command: python -m ebmbot.webserver + # host:container ports: container port should match the port in WEBHOOK_ORIGIN + ports: + - "1234:1234" + + + test: + extends: + service: dev + container_name: bennettbot-test + command: pytest diff --git a/docker/entrypoints/check.sh b/docker/entrypoints/check.sh new file mode 100755 index 00000000..bb5c5716 --- /dev/null +++ b/docker/entrypoints/check.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -euo pipefail + +echo Running black +black --check . +echo Running ruff +ruff check . diff --git a/docker/entrypoints/dev.sh b/docker/entrypoints/dev.sh new file mode 100755 index 00000000..b469787f --- /dev/null +++ b/docker/entrypoints/dev.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -euo pipefail + +exec "$@" diff --git a/docker/entrypoints/prod.sh b/docker/entrypoints/prod.sh new file mode 100755 index 00000000..21c89944 --- /dev/null +++ b/docker/entrypoints/prod.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -euo pipefail + +just run bot diff --git a/docker/justfile b/docker/justfile new file mode 100644 index 00000000..99e7a637 --- /dev/null +++ b/docker/justfile @@ -0,0 +1,65 @@ +# Load .env files by default +set dotenv-load := true + +# enable modern docker build features +export DOCKER_BUILDKIT := "1" +export COMPOSE_DOCKER_CLI_BUILD := "1" + +export BIN := "/opt/venv/bin" + +export DEV_USERID := `id -u` +export DEV_GROUPID := `id -g` + +build env="dev": + #!/usr/bin/env bash + + # set build args for prod builds + export BUILD_DATE=$(date -u +'%y-%m-%dT%H:%M:%SZ') + export GITREF=$(git rev-parse --short HEAD) + + # build the thing + docker-compose build --pull {{ env }} + + +# run python checks +check: build + docker-compose run --rm dev /app/docker/entrypoints/check.sh + + +# run tests in docker container +test *args="": build + docker-compose run --rm test python -m pytest \ + --cov=. \ + --cov-report html \ + --cov-report term-missing:skip-covered {{ args }} + + +# run command in dev container +run-cmd *args="bash": build + docker-compose run dev {{ args }} + + +# run all services +run-all: build + docker-compose up --detach dev_bot + docker-compose up --detach dev_dispatcher + docker-compose up --detach dev_webserver + + +# stop all dev containers +stop-all: + docker-compose stop dev_bot + docker-compose stop dev_dispatcher + docker-compose stop dev_webserver + + +# stop all dev containers +rm-all: stop-all + docker-compose rm dev_bot + docker-compose rm dev_dispatcher + docker-compose rm dev_webserver + + +# exec command in existing dev container +exec *args="bash": + docker-compose exec dev {{ args }} From 81baeaa9631ebc628f0a1f8b538244cde1ee1373 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 13 Dec 2023 15:18:26 +0000 Subject: [PATCH 02/17] Add a check endpoint for the webserver --- ebmbot/webserver/__init__.py | 5 +++++ tests/webserver/conftest.py | 8 ++++++++ tests/webserver/test_callback.py | 7 ------- tests/webserver/test_check.py | 4 ++++ tests/webserver/test_github.py | 7 +------ 5 files changed, 18 insertions(+), 13 deletions(-) create mode 100644 tests/webserver/conftest.py create mode 100644 tests/webserver/test_check.py diff --git a/ebmbot/webserver/__init__.py b/ebmbot/webserver/__init__.py index 7ec3e7c3..84af1ea3 100644 --- a/ebmbot/webserver/__init__.py +++ b/ebmbot/webserver/__init__.py @@ -4,6 +4,11 @@ from .github import handle_github_webhook +def check(): + return "ok" + + app = Flask(__name__) +app.route("/check/", methods=["GET"])(check) app.route("/github//", methods=["POST"])(handle_github_webhook) app.route("/callback/", methods=["POST"])(handle_callback_webhook) diff --git a/tests/webserver/conftest.py b/tests/webserver/conftest.py new file mode 100644 index 00000000..021a417d --- /dev/null +++ b/tests/webserver/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from ebmbot import webserver + + +@pytest.fixture() +def web_client(): + return webserver.app.test_client() diff --git a/tests/webserver/test_callback.py b/tests/webserver/test_callback.py index e1ce3b94..9d62f3e5 100644 --- a/tests/webserver/test_callback.py +++ b/tests/webserver/test_callback.py @@ -1,7 +1,5 @@ import pytest -from ebmbot import webserver - from ..assertions import assert_patched_slack_client_sends_messages from ..time_helpers import T @@ -9,11 +7,6 @@ pytestmark = pytest.mark.freeze_time(T(10)) -@pytest.fixture() -def web_client(): - return webserver.app.test_client() - - def test_with_valid_payload(web_client): url = "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51" diff --git a/tests/webserver/test_check.py b/tests/webserver/test_check.py new file mode 100644 index 00000000..65dd51fc --- /dev/null +++ b/tests/webserver/test_check.py @@ -0,0 +1,4 @@ +def test_check(web_client): + url = "/check/" + rsp = web_client.get(url) + assert rsp.data.decode() == "ok" diff --git a/tests/webserver/test_github.py b/tests/webserver/test_github.py index ccc1bfb8..a2e85516 100644 --- a/tests/webserver/test_github.py +++ b/tests/webserver/test_github.py @@ -2,7 +2,7 @@ import pytest -from ebmbot import scheduler, webserver +from ebmbot import scheduler from ..assertions import assert_job_matches from ..time_helpers import T0, T @@ -12,11 +12,6 @@ pytestmark = pytest.mark.freeze_time(T0) -@pytest.fixture() -def web_client(): - return webserver.app.test_client() - - PAYLOAD_PR_CLOSED = '{"action": "closed", "pull_request": {"merged": true}}' PAYLOAD_PR_CLOSED_UNMERGED = '{"action": "closed", "pull_request": {"merged": false}}' PAYLOAD_PR_OPENED = '{"action": "opened", "pull_request": {}}' From 8319ca1f116c70400ed1cd955323314c01e36d84 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 13 Dec 2023 15:31:50 +0000 Subject: [PATCH 03/17] Remove old fabric deployment code --- bin/start_service.sh | 12 ---- deploy/systemd/app.ebmbot.bot.service | 13 ---- deploy/systemd/app.ebmbot.dispatcher.service | 12 ---- deploy/systemd/app.ebmbot.webserver.service | 12 ---- dotenv-sample | 2 +- ebmbot/settings.py | 1 + fabfile.py | 73 -------------------- 7 files changed, 2 insertions(+), 123 deletions(-) delete mode 100755 bin/start_service.sh delete mode 100644 deploy/systemd/app.ebmbot.bot.service delete mode 100644 deploy/systemd/app.ebmbot.dispatcher.service delete mode 100644 deploy/systemd/app.ebmbot.webserver.service delete mode 100644 fabfile.py diff --git a/bin/start_service.sh b/bin/start_service.sh deleted file mode 100755 index 0f6fa4d0..00000000 --- a/bin/start_service.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -# set -euo pipefail - -REPO_ROOT=$(dirname $(dirname $0)) -VIRTUALENV_PATH=$REPO_ROOT/venv/bin - -source "$VIRTUALENV_PATH/activate" - -export PYTHONPATH="$PYTHONPATH:$REPO_ROOT" - -python -m ebmbot.$1 diff --git a/deploy/systemd/app.ebmbot.bot.service b/deploy/systemd/app.ebmbot.bot.service deleted file mode 100644 index 2b9a1688..00000000 --- a/deploy/systemd/app.ebmbot.bot.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=ebmbot slackbot for listening for Slack commands - -[Service] -User=ebmbot -ExecStart=/var/www/ebmbot/bin/start_service.sh bot -SyslogIdentifier=app.ebmbot.bot -Restart=always - # If the service restarts too quickly, we get 429s from the Slack API -RestartSec=60 - -[Install] -WantedBy=multi-user.target diff --git a/deploy/systemd/app.ebmbot.dispatcher.service b/deploy/systemd/app.ebmbot.dispatcher.service deleted file mode 100644 index 4beb0fdc..00000000 --- a/deploy/systemd/app.ebmbot.dispatcher.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=ebmbot job dispatcher - -[Service] -User=ebmbot -ExecStart=/var/www/ebmbot/bin/start_service.sh dispatcher -SyslogIdentifier=app.ebmbot.bot -Restart=always -RestartSec=4 - -[Install] -WantedBy=multi-user.target diff --git a/deploy/systemd/app.ebmbot.webserver.service b/deploy/systemd/app.ebmbot.webserver.service deleted file mode 100644 index c3919470..00000000 --- a/deploy/systemd/app.ebmbot.webserver.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=ebmbot webserver for listening for GitHub webhooks - -[Service] -User=ebmbot -ExecStart=/var/www/ebmbot/bin/start_service.sh webserver -SyslogIdentifier=app.ebmbot.webserver -Restart=always -RestartSec=4 - -[Install] -WantedBy=multi-user.target diff --git a/dotenv-sample b/dotenv-sample index acde9b89..2431f695 100644 --- a/dotenv-sample +++ b/dotenv-sample @@ -1,5 +1,5 @@ # See comments in ebmbot/settings.py -EBMBOT_PATH=$PATH:/var/www/ebmbot/venv/bin/ +EBMBOT_PATH=$PATH:/app/ LOGS_DIR=changeme SLACK_LOGS_CHANNEL=changeme SLACK_TECH_SUPPORT_CHANNEL=changeme diff --git a/ebmbot/settings.py b/ebmbot/settings.py index 8358b8bf..94baa5cf 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -7,6 +7,7 @@ env.read_env() APPLICATION_ROOT = Path(__file__).resolve().parent.parent +# If running in docker, this is /app/ EBMBOT_PATH = env.str("EBMBOT_PATH") DB_PATH = env.path("DB_PATH", default=APPLICATION_ROOT / "ebmbot.db") diff --git a/fabfile.py b/fabfile.py deleted file mode 100644 index 9f931327..00000000 --- a/fabfile.py +++ /dev/null @@ -1,73 +0,0 @@ -from fabric.api import abort, env, prefix, run, task -from fabric.context_managers import cd -from fabric.contrib.files import exists - - -env.forward_agent = True -env.colorize_errors = True - -env.hosts = ["smallweb1.ebmdatalab.net"] -env.user = "root" -env.path = "/var/www/ebmbot" - - -def make_directory(): - run(f"mkdir -p {env.path}") - - -def check_environment(): - environment_path = f"{env.path}/.env" - - if not exists(environment_path): - abort(f"Create {environment_path} before proceeding") - - -def create_venv(): - if not exists("venv"): - run("python3.8 -m venv venv") - - -def update_from_git(): - if not exists(".git"): - run("git clone -q git@github.com:ebmdatalab/ebmbot.git") - - run("git fetch --all") - run("git checkout --force origin/main") - - -def install_requirements(): - with prefix("source venv/bin/activate"): - run("pip install -q -r requirements.prod.txt") - - -def chown_everything(): - run(f"chown -R ebmbot:ebmbot {env.path}") - - -def set_up_systemd(): - for service in ["bot", "webserver", "dispatcher"]: - run( - f"ln -sf {env.path}/deploy/systemd/app.ebmbot.{service}.service /etc/systemd/system" - ) - run(f"sudo systemctl enable app.ebmbot.{service}.service") - - run("systemctl daemon-reload") - - -def restart_ebmbot(): - for service in ["bot", "webserver", "dispatcher"]: - run(f"systemctl restart app.ebmbot.{service}.service") - - -@task -def deploy(): - make_directory() - check_environment() - - with cd(env.path): - create_venv() - update_from_git() - install_requirements() - chown_everything() - set_up_systemd() - restart_ebmbot() From e88ded22c415d49ebf9abe836724b6c2bf84483d Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 13 Dec 2023 15:34:16 +0000 Subject: [PATCH 04/17] Check and test in docker --- .github/workflows/main.yml | 24 ++++++++++++++++++++++++ docker/docker-compose.yaml | 7 ++++++- docker/justfile | 8 ++++---- pyproject.toml | 3 ++- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8510b3e1..dcefcfb2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,3 +31,27 @@ jobs: - name: Run tests run: | just test + + + docker-check: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: opensafely-core/setup-action@v1 + with: + install-just: true + - name: Check formatting, linting and import sorting on docker + run: just docker/check + + + docker-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: opensafely-core/setup-action@v1 + with: + install-just: true + - name: Run tests in docker + run: just docker/test diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index fddddc35..8a1be706 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -1,5 +1,11 @@ # note: we do not run prod service with docker-compose, we use it just for # configuring the production build +# Environment variables: +# For dev services, env variables are set using the --env-file arg in the justfile +# For test, variables defined in pyproject.toml are used , so local and docker test +# runs use the same test env vars +# For prod, env vars are set manually in the dokku apps + services: prod: # image name, both locally and public @@ -49,7 +55,6 @@ services: extends: service: prod image: bennettbot-dev - env_file: ../.env container_name: bennettbot-dev build: # the dev stage in the Dockerfile diff --git a/docker/justfile b/docker/justfile index 99e7a637..0ef2b042 100644 --- a/docker/justfile +++ b/docker/justfile @@ -36,14 +36,14 @@ test *args="": build # run command in dev container run-cmd *args="bash": build - docker-compose run dev {{ args }} + docker-compose --env-file ../.env run dev {{ args }} # run all services run-all: build - docker-compose up --detach dev_bot - docker-compose up --detach dev_dispatcher - docker-compose up --detach dev_webserver + docker-compose --env-file ../.env up --detach dev_bot + docker-compose --env-file ../.env up --detach dev_dispatcher + docker-compose --env-file ../.env up --detach dev_webserver # stop all dev containers diff --git a/pyproject.toml b/pyproject.toml index 6b48c55f..9a3f593d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,7 +74,8 @@ env = [ "EBMBOT_WEBHOOK_SECRET=ebmbot_webhook_secret", "WEBHOOK_ORIGIN=http://localhost:9999", "EBMBOT_PATH=", - "GCP_CREDENTIALS_PATH=" + "GCP_CREDENTIALS_PATH=", + "DATA_TEAM_GITHUB_API_TOKEN=dummy-token" ] filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning:pytest_freezegun", From c2831a3da2bec9d237712048bd44da7ab1cc672c Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 14 Dec 2023 09:27:33 +0000 Subject: [PATCH 05/17] Add docker developers docs --- DEVELOPERS.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/DEVELOPERS.md b/DEVELOPERS.md index f91deae7..f4acd4cd 100644 --- a/DEVELOPERS.md +++ b/DEVELOPERS.md @@ -24,9 +24,14 @@ just # shortcut for just --list Set up a local development environment with: ``` -just dev_setup +just devenv ``` +This will create a virtual environment, install requurements and create a +`.env` file by copying `dotenv-sample`; update it as necessary with valid dev +values for environment variables. + + ### Set up a test slack workspace and app Create a [new slack workspace](https://slack.com/intl/en-gb/get-started#/createnew) to use for testing. @@ -34,15 +39,61 @@ Create a [new slack workspace](https://slack.com/intl/en-gb/get-started#/createn Follow the steps to [create a slack app with the required scopes](DEPLOY.md#creating-the-slack-app) and install it into your test workspace. + ### Set up environment -* Copy environment: `cp dotenv-sample .env` Edit `.env` with the slack app tokens etc for the test slack app. +## Run in docker + +### Build docker image + +This builds the dev image by default: + +``` +just docker/build +``` + +### Run checks + +``` +just docker/check` +``` + +### Run tests +``` +just docker/test +``` + +### Run all services + +Run all 3 services (bot, dispatcher and webserver) in separate docker +containers. + +``` +just docker/run-all +``` + +### Stop/remove containers + +Stop running service container: + +``` +just docker/stop-all +``` + +Stop running services and remove containers: + +``` +just docker/rm-all +``` + +## Run locally + ### Run checks -Run linter and formatted: +Run linter and formatter: ``` just check ``` From 9bd835186bc24698b136b37c753a0d27f09dc1b3 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 14 Dec 2023 15:13:10 +0000 Subject: [PATCH 06/17] Initial dokku deployment config and docs --- DEPLOY.md | 88 ++++++++++++++++++++++++++++++++++---- Procfile | 3 ++ docker/docker-compose.yaml | 22 ---------- ebmbot/DOKKU_SCALE | 3 ++ ebmbot/settings.py | 2 +- 5 files changed, 86 insertions(+), 32 deletions(-) create mode 100644 Procfile create mode 100644 ebmbot/DOKKU_SCALE diff --git a/DEPLOY.md b/DEPLOY.md index fc1ba5ae..e97fb067 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -1,9 +1,10 @@ -# Deployment +## Deployment -ebmbot is deployed on smallweb1 and is managed by systemd. +Deployment uses `dokku` and requires the environment variables defined in `dotenv-sample`. +It is deployed to our `dokku3` instance. -* To deploy: `fab deploy` -* To view logs: `sudo journalctl -u app.ebmbot.* -b -f` +It runs as a single dokku app named `bennettbot`, with multiple processes for each +service (bot, dispatcher and webserver) as defined in the `Procfile`. ## Creating the Slack app (This should only need to be done once). @@ -71,10 +72,27 @@ events. We need to add some more: If you update any scopes after installing the app, you'll need to reinstall it (slack will usually prompt for this). -### Set environment variables +## Configuring the dokku app +(This should only need to be done once). + +### Create app + +On dokku3, as the `dokku` user: + +```sh +dokku$ dokku apps:create bennettbot +``` + +### Create storage for sqlite db and logs +```sh +dokku$ mkdir -p /var/lib/dokku/data/storage/bennettbot/logs +dokku$ chown dokku:dokku /var/lib/dokku/data/storage/bennettbot +dokku$ dokku storage:mount bennettbot /var/lib/dokku/data/storage/bennettbot/:/storage +``` + +### Configure app environment variables -Copy `dotenv-sample` to `.env` and update with relevant environment variables. See also -comments in `ebmbot/settings.py`. +See also comments in `ebmbot/settings.py`. The following slack environment variables need to be set: - `SLACK_LOGS_CHANNEL`: channel where scheduled job notifications will be posted @@ -92,9 +110,61 @@ OpenPrescribing, and are configured at https://github.com/ebmdatalab/openprescri The following environment variable allows the bot to authenticate with Github to retrieve project information. -- `DATA_TEAM_GITHUB_API_TOKEN`: Note that this must be a classic PAT (not fine-grained) and - needs the `repo` and `read:project` scope +- `DATA_TEAM_GITHUB_API_TOKEN`: Note that this must be a classic PAT (not fine-grained) + and needs the `repo` and `read:project` scope This is the path to credentials for the gdrive@ebmdatalab.iam.gserviceaccount.com service account: - `GCP_CREDENTIALS_PATH` + +The path for logs; set this to a directory in the dokku mounted storage so the logs +persist outside of the containers. +- LOGS_DIR + +The path for the sqlite db file; set this to a file in the dokku mounted storage +- DB_PATH + +Set each env varible with: +```sh +dokku$ dokku config:set bennettbot ENVVAR_NAME=value +``` + +e.g. +```sh +dokku$ dokku config:set bennettbot LOGS_DIR=/storage/logs +dokku$ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db +``` + +### Map port 9999 for incoming github hooks +https://dokku.com/docs/networking/port-management/ + +dokku ports:add bennettbot http:9999:9999 + + +## Deployment + +Merges to the `main` branch will trigger an auto-deploy via GitHub actions. + +Note this deploys by building the prod docker image (see `docker/docker-compose.yaml`) and using the dokku [git:from-image](https://dokku.com/docs/deployment/methods/git/#initializing-an-app-repository-from-a-docker-image) command. + + +### Manually deploying + +To deploy manually: + +``` +# build prod image locally +just docker/build prod + +# tag image and push +docker tag bennettbot ghcr.io/ebmdatalab/bennettbot:latest +docker push ghcr.io/ebmdatalab/bennettbot:latest + +# get the SHA for the latest image +SHA=$(docker inspect --format='{{index .RepoDigests 0}}' ghcr.io/ebmdatalab/bennettbot:latest) +``` + +On dokku3, as the `dokku` user: +``` +dokku$ dokku git:from-image bennettbot +``` diff --git a/Procfile b/Procfile new file mode 100644 index 00000000..ef0c66ef --- /dev/null +++ b/Procfile @@ -0,0 +1,3 @@ +bot: python -m ebmbot.bot +dispatcher: python -m ebmbot.dispatcher +web: python -m ebmbot.webserver diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 8a1be706..766a016c 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -29,27 +29,6 @@ services: # use dockers builitin PID daemon init: true - bot: - extends: - service: prod - container_name: bennettbot-bot - command: python -m ebmbot.bot - - dispatcher: - extends: - service: prod - container_name: bennettbot-dispatcher - command: python -m ebmbot.dispatcher - - webserver: - extends: - service: prod - container_name: bennettbot-webserver - command: python -m ebmbot.webserver - # host:container ports: container port should match the port in WEBHOOK_ORIGIN - ports: - - "9999:9999" - # main development service dev: extends: @@ -88,7 +67,6 @@ services: ports: - "1234:1234" - test: extends: service: dev diff --git a/ebmbot/DOKKU_SCALE b/ebmbot/DOKKU_SCALE new file mode 100644 index 00000000..83d6006f --- /dev/null +++ b/ebmbot/DOKKU_SCALE @@ -0,0 +1,3 @@ +web=1 +bot=1 +dispatcher=1 diff --git a/ebmbot/settings.py b/ebmbot/settings.py index 94baa5cf..bdeace58 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -8,7 +8,7 @@ APPLICATION_ROOT = Path(__file__).resolve().parent.parent # If running in docker, this is /app/ -EBMBOT_PATH = env.str("EBMBOT_PATH") +EBMBOT_PATH = env.str("EBMBOT_PATH", default=APPLICATION_ROOT) DB_PATH = env.path("DB_PATH", default=APPLICATION_ROOT / "ebmbot.db") WORKSPACE_DIR = env.path("WORKSPACE_DIR", default=APPLICATION_ROOT / "workspace") From 0dcae2e18ea54fb960c6457a6263b8919d3b0684 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 14 Dec 2023 18:59:24 +0000 Subject: [PATCH 07/17] Add app.json with dokku checks --- app.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 app.json diff --git a/app.json b/app.json new file mode 100644 index 00000000..bdeeea8a --- /dev/null +++ b/app.json @@ -0,0 +1,15 @@ +{ + "healthchecks": { + "web": [ + { + "type": "startup", + "name": "web check", + "description": "Checking if the app responds to the /check endpoint", + "path": "/check/", + "attempts": 10, + "wait": 30, + "timeout": 60 + } + ] + } + } From 1eeb7c5e401f4f85193679c37ea567cce039a626 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 15 Dec 2023 09:38:20 +0000 Subject: [PATCH 08/17] Downgrade python version for fabric3 When we've upgrade fabric3 to standard fabric on OP and fdaaa, we can do the same here. Until then we're stuck at 3.9 --- .github/workflows/main.yml | 4 ++-- docker/Dockerfile | 2 +- docker/build-dependencies.txt | 2 +- docker/dependencies.txt | 6 +++--- justfile | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dcefcfb2..479f1e18 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: - uses: opensafely-core/setup-action@v1 with: install-just: true - python-version: "3.8" + python-version: "3.9" cache-dependency-path: requirements.*.txt - name: Check formatting, linting and import sorting run: just check @@ -26,7 +26,7 @@ jobs: - uses: opensafely-core/setup-action@v1 with: install-just: true - python-version: "3.8" + python-version: "3.9" cache-dependency-path: requirements.*.txt - name: Run tests run: | diff --git a/docker/Dockerfile b/docker/Dockerfile index 41a2b6c2..34ed984a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -45,7 +45,7 @@ RUN --mount=type=cache,target=/var/cache/apt \ /root/docker-apt-install.sh /tmp/build-dependencies.txt # Install everything in venv for isolation from system python libraries -RUN python3.11 -m venv /opt/venv +RUN python3.9 -m venv /opt/venv ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" # The cache mount means a) /root/.cache is not in the image, and b) it's preserved diff --git a/docker/build-dependencies.txt b/docker/build-dependencies.txt index 4d8c9e21..c6e259bc 100644 --- a/docker/build-dependencies.txt +++ b/docker/build-dependencies.txt @@ -1,2 +1,2 @@ # list ubuntu packges needed to build dependencies, one per line -python3.11-dev +python3.9-dev diff --git a/docker/dependencies.txt b/docker/dependencies.txt index 2ba51fe3..a97346ca 100644 --- a/docker/dependencies.txt +++ b/docker/dependencies.txt @@ -1,5 +1,5 @@ # list ubuntu packages needed in production, one per line -python3.11 -python3.11-distutils -python3.11-venv +python3.9 +python3.9-distutils +python3.9-venv sqlite3 diff --git a/justfile b/justfile index a14734fb..deb183d3 100644 --- a/justfile +++ b/justfile @@ -5,7 +5,7 @@ export VIRTUAL_ENV := `echo ${VIRTUAL_ENV:-.venv}` export BIN := VIRTUAL_ENV + if os_family() == "unix" { "/bin" } else { "/Scripts" } export PIP := BIN + if os_family() == "unix" { "/python -m pip" } else { "/python.exe -m pip" } -export DEFAULT_PYTHON := if os_family() == "unix" { "python3.8" } else { "python" } +export DEFAULT_PYTHON := if os_family() == "unix" { "python3.9" } else { "python" } set dotenv-load := true From c618e19896a8b0bc276c537614d3e6d21abad9d7 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 15 Dec 2023 09:40:12 +0000 Subject: [PATCH 09/17] Get rid of unnecessary EBMBOT_PATH setting, which was breaking things in docker anyway --- docker/Dockerfile | 2 +- ebmbot/dispatcher.py | 2 +- ebmbot/settings.py | 2 -- pyproject.toml | 1 - 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 34ed984a..7f37ec8c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -89,7 +89,7 @@ WORKDIR /app # This may not be necessary, but it probably doesn't hurt ENV PYTHONPATH=/app - +ENV PATH=$PATH:/app ################################################## # diff --git a/ebmbot/dispatcher.py b/ebmbot/dispatcher.py index 93ff1d53..661a3a7b 100644 --- a/ebmbot/dispatcher.py +++ b/ebmbot/dispatcher.py @@ -122,7 +122,7 @@ def run_command(self): stderr=stderr, env={ "EBMBOT_CALLBACK_URL": self.callback_url, - "PATH": settings.EBMBOT_PATH or os.environ["PATH"], + "PATH": os.environ["PATH"], }, shell=True, ) diff --git a/ebmbot/settings.py b/ebmbot/settings.py index bdeace58..636853a1 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -7,8 +7,6 @@ env.read_env() APPLICATION_ROOT = Path(__file__).resolve().parent.parent -# If running in docker, this is /app/ -EBMBOT_PATH = env.str("EBMBOT_PATH", default=APPLICATION_ROOT) DB_PATH = env.path("DB_PATH", default=APPLICATION_ROOT / "ebmbot.db") WORKSPACE_DIR = env.path("WORKSPACE_DIR", default=APPLICATION_ROOT / "workspace") diff --git a/pyproject.toml b/pyproject.toml index 9a3f593d..d81c6e9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,6 @@ env = [ "GITHUB_WEBHOOK_SECRET=github_webhook_secret", "EBMBOT_WEBHOOK_SECRET=ebmbot_webhook_secret", "WEBHOOK_ORIGIN=http://localhost:9999", - "EBMBOT_PATH=", "GCP_CREDENTIALS_PATH=", "DATA_TEAM_GITHUB_API_TOKEN=dummy-token" ] From 1291272f84eb62658c8eee9839f7886a82dd30ee Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 3 Jan 2024 18:01:17 +0000 Subject: [PATCH 10/17] Update app user and group to match the user created in sysadmin repo --- docker/Dockerfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7f37ec8c..b912718b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -123,8 +123,13 @@ LABEL org.opencontainers.image.created=$BUILD_DATE ARG GITREF=unknown LABEL org.opencontainers.image.revision=$GITREF -ARG USERID=10001 -ARG GROUPID=10001 +ARG USERID=10004 +ARG GROUPID=10004 + +# Jobs that run fab commands add fabfiles into the job's workspace, so need +# to own this folder +RUN chown -R ${USERID}:${GROUPID} /app/workspace + USER ${USERID}:${GROUPID} ################################################## From 734bbbd8202fd35402f9c9c2a6d0f3ac28ecca82 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Wed, 3 Jan 2024 18:00:55 +0000 Subject: [PATCH 11/17] Add ssh-client and docs for ssh setup --- DEPLOY.md | 54 ++++++++++++++++++++++++++++++++--------- docker/Dockerfile | 13 +++++++--- docker/dependencies.txt | 1 + 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/DEPLOY.md b/DEPLOY.md index e97fb067..22fb417d 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -77,19 +77,49 @@ usually prompt for this). ### Create app -On dokku3, as the `dokku` user: - ```sh -dokku$ dokku apps:create bennettbot +$ dokku apps:create bennettbot ``` ### Create storage for sqlite db and logs ```sh -dokku$ mkdir -p /var/lib/dokku/data/storage/bennettbot/logs -dokku$ chown dokku:dokku /var/lib/dokku/data/storage/bennettbot -dokku$ dokku storage:mount bennettbot /var/lib/dokku/data/storage/bennettbot/:/storage +$ mkdir -p /var/lib/dokku/data/storage/bennettbot/logs +$ dokku storage:mount bennettbot /var/lib/dokku/data/storage/bennettbot/:/storage ``` +### Set up the user + +This is done using the ansible playbook in `https://github.com/ebmdatalab/sysadmin/blob/main/infra`. + +See for more details. + +To run just the bennettbot tasks: + +```sh +just test dokku3 --tags bennettbot +``` +And if all looks OK: + +```sh +just apply dokku3 --tags bennettbot +``` + +This will create the ebmbot user on dokku3 and chown any mounted volumes. + + +### Create ssh key and mount ebmbot user's home directory + +Create an ssh key for the ebmbot user, in the usual $HOME/.ssh/ location. + +Mount the user's home directory into the app. + +```sh +$ dokku storage:mount bennettbot /home/ebmbot/:/home/ebmbot +``` + +Add the ebmbot user's key to any servers that it requires acces to +(i.e. any jobs that run `fab` commands). + ### Configure app environment variables See also comments in `ebmbot/settings.py`. @@ -119,20 +149,20 @@ service account: The path for logs; set this to a directory in the dokku mounted storage so the logs persist outside of the containers. -- LOGS_DIR +- `LOGS_DIR` The path for the sqlite db file; set this to a file in the dokku mounted storage -- DB_PATH +- `DB_PATH` Set each env varible with: ```sh -dokku$ dokku config:set bennettbot ENVVAR_NAME=value +$ dokku config:set bennettbot ENVVAR_NAME=value ``` e.g. ```sh -dokku$ dokku config:set bennettbot LOGS_DIR=/storage/logs -dokku$ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db +$ dokku config:set bennettbot LOGS_DIR=/storage/logs +$ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db ``` ### Map port 9999 for incoming github hooks @@ -166,5 +196,5 @@ SHA=$(docker inspect --format='{{index .RepoDigests 0}}' ghcr.io/ebmdatalab/benn On dokku3, as the `dokku` user: ``` -dokku$ dokku git:from-image bennettbot +$ dokku git:from-image bennettbot ``` diff --git a/docker/Dockerfile b/docker/Dockerfile index b912718b..d94037d3 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -98,9 +98,7 @@ ENV PATH=$PATH:/app # Copy code in, add proper metadata FROM bennettbot-base as bennettbot-prod -# Dokku is using port 7000 rather than the default 5000 -# We need to expose it here so that the dokku checks will use the correct port -EXPOSE 7000 +EXPOSE 9999 # Adjust this metadata to fit project. Note that the base-docker image does set # some basic metadata. @@ -111,9 +109,13 @@ LABEL org.opencontainers.image.title="bennettbot" \ # copy application code COPY . /app - # We set command rather than entrypoint, to make it easier to run different # things from the cli +# In production, dokku extract the Procfile and uses the commands defined +# there as the entrypoints. If we defined an ENTRYPOINT here, the commands +# in the Procfile would be passed as arguments to that ENTRYPOINT, which is +# not what we want +# https://dokku.com/docs/processes/process-management/#procfile CMD ["/app/docker/entrypoints/prod.sh"] # finally, tag with build information. These will change regularly, therefore @@ -126,6 +128,9 @@ LABEL org.opencontainers.image.revision=$GITREF ARG USERID=10004 ARG GROUPID=10004 +# Add the user by name, required for ssh +RUN useradd -u ${USERID} ebmbot + # Jobs that run fab commands add fabfiles into the job's workspace, so need # to own this folder RUN chown -R ${USERID}:${GROUPID} /app/workspace diff --git a/docker/dependencies.txt b/docker/dependencies.txt index a97346ca..8822de44 100644 --- a/docker/dependencies.txt +++ b/docker/dependencies.txt @@ -1,4 +1,5 @@ # list ubuntu packages needed in production, one per line +openssh-client python3.9 python3.9-distutils python3.9-venv From 700ad52bd64a72ac25619b59910428d1f3fc6acb Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 4 Jan 2024 10:28:18 +0000 Subject: [PATCH 12/17] Remove unused webserver callback url I am fairly confident that the only thing that used this callback url was a very old edition of cohort-extractor https://github.com/ebmdatalab/ebmbot/commit/22fba6e2019bbe5762a669aa2a87af938fc7db40 --- ebmbot/dispatcher.py | 30 ----------------- ebmbot/webserver/__init__.py | 2 -- ebmbot/webserver/callback.py | 47 -------------------------- tests/assertions.py | 13 -------- tests/job_configs.py | 3 -- tests/test_dispatcher.py | 34 ++----------------- tests/webserver/test_callback.py | 57 -------------------------------- 7 files changed, 2 insertions(+), 184 deletions(-) delete mode 100644 ebmbot/webserver/callback.py delete mode 100644 tests/webserver/test_callback.py diff --git a/ebmbot/dispatcher.py b/ebmbot/dispatcher.py index 661a3a7b..8a2d7386 100644 --- a/ebmbot/dispatcher.py +++ b/ebmbot/dispatcher.py @@ -8,7 +8,6 @@ import traceback from datetime import datetime, timezone from multiprocessing import Process -from urllib.parse import urlencode, urlparse, urlunparse import requests from slack_sdk import WebClient @@ -16,7 +15,6 @@ from . import job_configs, scheduler, settings from .bot import get_channels from .logger import logger -from .signatures import generate_hmac from .slack import notify_slack @@ -70,7 +68,6 @@ def __init__(self, slack_client, job_id, config): if self.python_file: self.python_file = self.cwd / self.python_file self.python_function = self.job_config["python_function"] - self.callback_url = self.build_callback_url() def start_job(self): """Start running the job in a new subprocess.""" @@ -98,7 +95,6 @@ def run_command(self): run_args=self.run_args, python_file=self.python_file, python_function=self.python_function, - callback_url=self.callback_url, cwd=self.cwd, stdout_path=self.stdout_path, stderr_path=self.stdout_path, @@ -121,7 +117,6 @@ def run_command(self): stdout=stdout, stderr=stderr, env={ - "EBMBOT_CALLBACK_URL": self.callback_url, "PATH": os.environ["PATH"], }, shell=True, @@ -220,31 +215,6 @@ def set_up_log_dir(self): self.stderr_path = self.log_dir / "stderr" self.log_dir.mkdir(parents=True, exist_ok=True) - def build_callback_url(self): - timestamp = str(time.time()) - hmac = generate_hmac( - timestamp.encode("utf8"), settings.EBMBOT_WEBHOOK_SECRET - ).decode("utf8") - querystring = urlencode( - { - "channel": self.job["channel"], - "thread_ts": self.job["thread_ts"], - "token": f"{timestamp}:{hmac}", - } - ) - parsed_url = urlparse(settings.WEBHOOK_ORIGIN) - - return urlunparse( - ( - parsed_url.scheme, # scheme - parsed_url.netloc, # host - "callback/", # path - "", # params - querystring, # query - "", # fragment - ) - ) - if __name__ == "__main__": logger.info("running ebmbot.dispatcher") diff --git a/ebmbot/webserver/__init__.py b/ebmbot/webserver/__init__.py index 84af1ea3..d2fe093c 100644 --- a/ebmbot/webserver/__init__.py +++ b/ebmbot/webserver/__init__.py @@ -1,6 +1,5 @@ from flask import Flask -from .callback import handle_callback_webhook from .github import handle_github_webhook @@ -11,4 +10,3 @@ def check(): app = Flask(__name__) app.route("/check/", methods=["GET"])(check) app.route("/github//", methods=["POST"])(handle_github_webhook) -app.route("/callback/", methods=["POST"])(handle_callback_webhook) diff --git a/ebmbot/webserver/callback.py b/ebmbot/webserver/callback.py deleted file mode 100644 index 26096bb2..00000000 --- a/ebmbot/webserver/callback.py +++ /dev/null @@ -1,47 +0,0 @@ -import json - -from flask import abort, request -from slack_sdk import WebClient - -from .. import settings -from ..signatures import InvalidHMAC, validate_hmac -from ..slack import notify_slack - - -def handle_callback_webhook(): - """Respond to callback webhook.""" - try: - channel = request.args["channel"] - thread_ts = request.args["thread_ts"] - token = request.args["token"] - except KeyError: - abort(400) - - try: - timestamp, signature = token.split(":", 1) - except ValueError: - abort(400) - - try: - validate_hmac( - timestamp.encode("utf8"), - settings.EBMBOT_WEBHOOK_SECRET, - signature.encode("utf8"), - max_age=settings.EBMBOT_WEBHOOK_TOKEN_TTL, - ) - except InvalidHMAC: - abort(403) - - try: - data = json.loads(request.data.decode()) - except json.decoder.JSONDecodeError: - abort(400) - try: - message = data["message"] - except KeyError: - abort(400) - - client = WebClient(token=settings.SLACK_BOT_TOKEN) - notify_slack(client, channel=channel, thread_ts=thread_ts, message_text=message) - - return "" diff --git a/tests/assertions.py b/tests/assertions.py index b5d419d1..9a964054 100644 --- a/tests/assertions.py +++ b/tests/assertions.py @@ -1,7 +1,3 @@ -from contextlib import contextmanager -from unittest.mock import patch - - def assert_job_matches(job, type_, args, channel, start_after, started_at): assert_subdict( { @@ -40,15 +36,6 @@ def assert_slack_client_sends_messages( assert "blocks" not in actual_call_kwargs[-1] -@contextmanager -def assert_patched_slack_client_sends_messages(messages_kwargs=None): - messages_kwargs = messages_kwargs or [] - with patch("slack_sdk.WebClient.chat_postMessage") as p: - yield - actual_call_kwargs = [call.kwargs for call in p.call_args_list] - check_slack_client_calls(actual_call_kwargs, messages_kwargs) - - def check_slack_client_calls(actual_call_kwargs_list, expected_messages_kwargs): assert len(expected_messages_kwargs) == len(actual_call_kwargs_list) for exp_call_kwargs, actual_call in zip( diff --git a/tests/job_configs.py b/tests/job_configs.py index d01d1c05..7386fc6b 100644 --- a/tests/job_configs.py +++ b/tests/job_configs.py @@ -23,9 +23,6 @@ "run_args_template": "cat poem", "report_success": False, }, - "job_to_test_callback": { - "run_args_template": "echo $EBMBOT_CALLBACK_URL", - }, "bad_job": { "run_args_template": "cat no-poem", }, diff --git a/tests/test_dispatcher.py b/tests/test_dispatcher.py index 962bbebc..98ab51e6 100644 --- a/tests/test_dispatcher.py +++ b/tests/test_dispatcher.py @@ -4,13 +4,10 @@ import pytest -from ebmbot import scheduler, settings, webserver +from ebmbot import scheduler, settings from ebmbot.dispatcher import JobDispatcher, run_once -from .assertions import ( - assert_patched_slack_client_sends_messages, - assert_slack_client_sends_messages, -) +from .assertions import assert_slack_client_sends_messages from .job_configs import config from .time_helpers import T0, TS, T @@ -202,33 +199,6 @@ def test_job_failure_when_command_not_found(mock_client): assert f.read() == "/bin/sh: 1: dog: not found\n" -def test_job_with_callback(mock_client): - log_dir = build_log_dir("test_job_to_test_callback") - - scheduler.schedule_job("test_job_to_test_callback", {}, "channel", TS, 0) - job = scheduler.reserve_job() - - do_job(mock_client.client, job) - assert_slack_client_sends_messages( - mock_client.recorder, - messages_kwargs=[ - {"channel": "logs", "text": "about to start"}, - {"channel": "channel", "text": "succeeded"}, - ], - ) - - with open(os.path.join(log_dir, "stdout")) as f: - url = f.read().strip() - - client = webserver.app.test_client() - - with assert_patched_slack_client_sends_messages( - messages_kwargs=[{"channel": "channel", "text": "Job done", "thread_ts": TS}] - ): - rsp = client.post(url, data='{"message": "Job done"}') - assert rsp.status_code == 200 - - def test_python_job_success(mock_client): log_dir = build_log_dir("test_good_python_job") diff --git a/tests/webserver/test_callback.py b/tests/webserver/test_callback.py deleted file mode 100644 index 9d62f3e5..00000000 --- a/tests/webserver/test_callback.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest - -from ..assertions import assert_patched_slack_client_sends_messages -from ..time_helpers import T - - -pytestmark = pytest.mark.freeze_time(T(10)) - - -def test_with_valid_payload(web_client): - url = "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51" - - with assert_patched_slack_client_sends_messages( - messages_kwargs=[ - {"channel": "channel", "text": "Job done", "thread_ts": "1234567890.098765"} - ] - ): - rsp = web_client.post(url, data='{"message": "Job done"}') - assert rsp.status_code == 200 - - -@pytest.mark.parametrize("data", ['"message": "Job done"}', '{"mossage": "Job done"}']) -def test_with_invalid_payload(web_client, data): - url = "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51" - - with assert_patched_slack_client_sends_messages(): - rsp = web_client.post(url, data=data) - assert rsp.status_code == 400 - - -@pytest.mark.parametrize( - "url", - [ - "/callback/?thread_ts=1234567890.098765&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51", # missing channel - "/callback/?channel=channel&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51", # missing thread_ts - "/callback/?channel=channel&thread_ts=1234567890.098765&", # missing token - "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.0", # invalid token - ], -) -def test_with_invalid_url(web_client, url): - with assert_patched_slack_client_sends_messages(): - rsp = web_client.post(url, data="Job done") - assert rsp.status_code == 400 - - -@pytest.mark.parametrize( - "url", - [ - "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.1:43dfc12afbe479453b7ad54bbca9250923d80d51", # invalid signature - "/callback/?channel=channel&thread_ts=1234567890.098765&token=1575976333.0:43dfc12afbe479453b7ad54bbca9250923d80d51", # expired token - ], -) -def test_with_invalid_auth(freezer, web_client, url): - freezer.move_to(T(60 * 60 + 1)) - with assert_patched_slack_client_sends_messages(): - rsp = web_client.post(url, data="Job done") - assert rsp.status_code == 403 From 128fc2fbf540d83d14149eacd8038acd5a0bfac8 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 4 Jan 2024 11:32:11 +0000 Subject: [PATCH 13/17] Add a setting for FAB_WORKSPACE_DIR so that directories created by job can live elsewhere This means that the non-root docker user doesn't need write access to files that exist only in the container. --- DEPLOY.md | 9 ++++++++- docker/Dockerfile | 4 ---- ebmbot/dispatcher.py | 2 +- ebmbot/job_configs.py | 22 +++++++++++++++++++++- ebmbot/settings.py | 7 +++++++ tests/test_job_configs.py | 7 +++++++ 6 files changed, 44 insertions(+), 7 deletions(-) diff --git a/DEPLOY.md b/DEPLOY.md index 22fb417d..ccc4f8c8 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -81,9 +81,10 @@ usually prompt for this). $ dokku apps:create bennettbot ``` -### Create storage for sqlite db and logs +### Create storage for sqlite db, logs and fabric job workspaces ```sh $ mkdir -p /var/lib/dokku/data/storage/bennettbot/logs +$ mkdir -p /var/lib/dokku/data/storage/bennettbot/workspace $ dokku storage:mount bennettbot /var/lib/dokku/data/storage/bennettbot/:/storage ``` @@ -154,6 +155,11 @@ persist outside of the containers. The path for the sqlite db file; set this to a file in the dokku mounted storage - `DB_PATH` +The path for workspaces that are created by the job (i.e. fabric jobs that fetch +the fabfile for running the commands). Set this to a directory in the dokku mounted +storage that the docker user will have write access to. +- `FAB_WORKSPACE_DIR`` + Set each env varible with: ```sh $ dokku config:set bennettbot ENVVAR_NAME=value @@ -163,6 +169,7 @@ e.g. ```sh $ dokku config:set bennettbot LOGS_DIR=/storage/logs $ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db +$ dokku config:set bennettbot FAB_WORKSPACE_DIR=/storage/workspace ``` ### Map port 9999 for incoming github hooks diff --git a/docker/Dockerfile b/docker/Dockerfile index d94037d3..a2efaf42 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -131,10 +131,6 @@ ARG GROUPID=10004 # Add the user by name, required for ssh RUN useradd -u ${USERID} ebmbot -# Jobs that run fab commands add fabfiles into the job's workspace, so need -# to own this folder -RUN chown -R ${USERID}:${GROUPID} /app/workspace - USER ${USERID}:${GROUPID} ################################################## diff --git a/ebmbot/dispatcher.py b/ebmbot/dispatcher.py index 8a2d7386..c86e23a1 100644 --- a/ebmbot/dispatcher.py +++ b/ebmbot/dispatcher.py @@ -60,7 +60,7 @@ def __init__(self, slack_client, job_id, config): self.job_config = config["jobs"][self.job["type"]] self.namespace = self.job["type"].split("_")[0] - self.cwd = settings.WORKSPACE_DIR / self.namespace + self.cwd = config["workspace_dir"][self.namespace] / self.namespace self.fabfile_url = config["fabfiles"].get(self.namespace) escaped_args = {k: shlex.quote(v) for k, v in self.job["args"].items()} self.run_args = self.job_config["run_args_template"].format(**escaped_args) diff --git a/ebmbot/job_configs.py b/ebmbot/job_configs.py index aad600ca..24073618 100644 --- a/ebmbot/job_configs.py +++ b/ebmbot/job_configs.py @@ -30,12 +30,19 @@ * "fabfile": optional, the URL of a fabfile which is required to run commands in the namespace * "python_file": optional, the path to a python file within the namespace + * "workspace_dir": optional, the path to the directory where files related to + the namespace can be found. Defaults to ebmbot/workspace, which is the location + of job files that exist only within this repo. Can be set to another location for + jobs that fetch files and create a namespace workspace location for them + (e.g. fabric jobs). """ import re from operator import itemgetter +from ebmbot import settings + # fmt: off raw_config = { @@ -75,6 +82,7 @@ ], }, "fdaaa": { + "workspace_dir": settings.FAB_WORKSPACE_DIR, "fabfile": "https://raw.githubusercontent.com/ebmdatalab/clinicaltrials-act-tracker/master/fabfile.py", "jobs": { "deploy": { @@ -91,6 +99,7 @@ ], }, "op": { + "workspace_dir": settings.FAB_WORKSPACE_DIR, "fabfile": "https://raw.githubusercontent.com/ebmdatalab/openprescribing/main/fabfile.py", "jobs": { "deploy": { @@ -352,7 +361,14 @@ def build_config(raw_config): See test_job_configs for an example. """ - config = {"jobs": {}, "slack": [], "help": {}, "fabfiles": {}, "python_files": {}} + config = { + "jobs": {}, + "slack": [], + "help": {}, + "fabfiles": {}, + "python_files": {}, + "workspace_dir": {}, + } for namespace in raw_config: if "_" in namespace: # pragma: no cover @@ -390,6 +406,10 @@ def build_config(raw_config): if "python_file" in raw_config[namespace]: config["python_files"][namespace] = raw_config[namespace]["python_file"] + config["workspace_dir"][namespace] = raw_config[namespace].get( + "workspace_dir", settings.WORKSPACE_DIR + ) + config["slack"] = sorted(config["slack"], key=itemgetter("command")) for slack_config in config["slack"]: diff --git a/ebmbot/settings.py b/ebmbot/settings.py index 636853a1..adc7634e 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -9,7 +9,14 @@ APPLICATION_ROOT = Path(__file__).resolve().parent.parent DB_PATH = env.path("DB_PATH", default=APPLICATION_ROOT / "ebmbot.db") +# location of job workspaces that live in this repo WORKSPACE_DIR = env.path("WORKSPACE_DIR", default=APPLICATION_ROOT / "workspace") +# location of fabric jobs which don't have workspaces in this repo +# These jobs will fetch fabric files and create a folder in +# FAB_WORKSPACE_DIR. In production, we want this to be a location in a +# mounted volume in the dokku app, which the docker user has write access +# to, and not a location that only exists in the container +FAB_WORKSPACE_DIR = env.path("FAB_WORKSPACE_DIR", default=WORKSPACE_DIR) LOGS_DIR = env.path("LOGS_DIR") SLACK_LOGS_CHANNEL = env.str("SLACK_LOGS_CHANNEL") SLACK_TECH_SUPPORT_CHANNEL = env.str("SLACK_TECH_SUPPORT_CHANNEL") diff --git a/tests/test_job_configs.py b/tests/test_job_configs.py index babe4a48..42232b98 100644 --- a/tests/test_job_configs.py +++ b/tests/test_job_configs.py @@ -2,12 +2,14 @@ import pytest +from ebmbot import settings from ebmbot.job_configs import build_config def test_build_config(): raw_config = { "ns1": { + "workspace_dir": "/foo/", "jobs": { "good_job": {"run_args_template": "cat [poem]"}, "bad_job": {"run_args_template": "dog [poem]"}, @@ -142,6 +144,11 @@ def test_build_config(): }, "fabfiles": {}, "python_files": {"ns3": "jobs.py"}, + "workspace_dir": { + "ns1": "/foo/", + "ns2": settings.WORKSPACE_DIR, + "ns3": settings.WORKSPACE_DIR, + }, } From bde0d0f620c4b95d08149a2706a08b76a5e361ca Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 4 Jan 2024 12:35:00 +0000 Subject: [PATCH 14/17] Add an alias setting for the logs dir on the host for reporting location of log files when jobs fail --- DEPLOY.md | 6 +++++- ebmbot/dispatcher.py | 8 +++++--- ebmbot/settings.py | 3 +++ tests/test_dispatcher.py | 24 +++++++++++++++++++++++- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/DEPLOY.md b/DEPLOY.md index ccc4f8c8..790e380f 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -151,6 +151,9 @@ service account: The path for logs; set this to a directory in the dokku mounted storage so the logs persist outside of the containers. - `LOGS_DIR` +Also set the alias for the logs dir to the location of the mounted volume on the host, +for error reporting +- `HOST_LOGS_DIR` The path for the sqlite db file; set this to a file in the dokku mounted storage - `DB_PATH` @@ -158,7 +161,7 @@ The path for the sqlite db file; set this to a file in the dokku mounted storage The path for workspaces that are created by the job (i.e. fabric jobs that fetch the fabfile for running the commands). Set this to a directory in the dokku mounted storage that the docker user will have write access to. -- `FAB_WORKSPACE_DIR`` +- `FAB_WORKSPACE_DIR` Set each env varible with: ```sh @@ -168,6 +171,7 @@ $ dokku config:set bennettbot ENVVAR_NAME=value e.g. ```sh $ dokku config:set bennettbot LOGS_DIR=/storage/logs +$ dokku config:set bennettbot HOST_LOGS_DIR=/var/lib/dokku/data/storage/bennettbot/logs $ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db $ dokku config:set bennettbot FAB_WORKSPACE_DIR=/storage/workspace ``` diff --git a/ebmbot/dispatcher.py b/ebmbot/dispatcher.py index c86e23a1..76599539 100644 --- a/ebmbot/dispatcher.py +++ b/ebmbot/dispatcher.py @@ -8,6 +8,7 @@ import traceback from datetime import datetime, timezone from multiprocessing import Process +from pathlib import Path import requests from slack_sdk import WebClient @@ -162,7 +163,7 @@ def notify_end(self, rc): else: return else: - msg = f"Command `{self.job['type']}` failed (find logs in {self.log_dir}). Calling tech-support." + msg = f"Command `{self.job['type']}` failed (find logs in {self.host_log_dir} on dokku3). Calling tech-support." error = True slack_message = notify_slack( @@ -208,9 +209,10 @@ def update_fabfile(self): # pragma: no cover def set_up_log_dir(self): """Create directory for recording stdout/stderr.""" - timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") - self.log_dir = settings.LOGS_DIR / self.job["type"] / timestamp + job_log_path = Path(self.job["type"]) / timestamp + self.log_dir = settings.LOGS_DIR / job_log_path + self.host_log_dir = settings.HOST_LOGS_DIR / job_log_path self.stdout_path = self.log_dir / "stdout" self.stderr_path = self.log_dir / "stderr" self.log_dir.mkdir(parents=True, exist_ok=True) diff --git a/ebmbot/settings.py b/ebmbot/settings.py index adc7634e..142d3b1e 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -18,6 +18,9 @@ # to, and not a location that only exists in the container FAB_WORKSPACE_DIR = env.path("FAB_WORKSPACE_DIR", default=WORKSPACE_DIR) LOGS_DIR = env.path("LOGS_DIR") +# An alias for logs dir; this is just used for reporting the host location of logs +# in slack, where the log dir is a mounted volume +HOST_LOGS_DIR = env.path("HOST_LOGS_DIR", LOGS_DIR) SLACK_LOGS_CHANNEL = env.str("SLACK_LOGS_CHANNEL") SLACK_TECH_SUPPORT_CHANNEL = env.str("SLACK_TECH_SUPPORT_CHANNEL") SLACK_BOT_TOKEN = env.str("SLACK_BOT_TOKEN") diff --git a/tests/test_dispatcher.py b/tests/test_dispatcher.py index 98ab51e6..867850d5 100644 --- a/tests/test_dispatcher.py +++ b/tests/test_dispatcher.py @@ -1,6 +1,7 @@ import json import os import shutil +from unittest.mock import patch import pytest @@ -186,7 +187,7 @@ def test_job_failure_when_command_not_found(mock_client): mock_client.recorder, messages_kwargs=[ {"channel": "logs", "text": "about to start"}, - {"channel": "channel", "text": "failed"}, + {"channel": "channel", "text": "failed (find logs in tests/logs/"}, # failed message url reposted to tech support channel (C0001 in the mock) {"channel": "C0001", "text": "http://test"}, ], @@ -199,6 +200,27 @@ def test_job_failure_when_command_not_found(mock_client): assert f.read() == "/bin/sh: 1: dog: not found\n" +@patch("ebmbot.settings.HOST_LOGS_DIR", "/host/logs/") +def test_job_failure_with_host_log_dirs_setting(mock_client): + log_dir = build_log_dir("test_bad_job") + + scheduler.schedule_job("test_bad_job", {}, "channel", TS, 0) + job = scheduler.reserve_job() + do_job(mock_client.client, job) + assert_slack_client_sends_messages( + mock_client.recorder, + messages_kwargs=[ + {"channel": "logs", "text": "about to start"}, + {"channel": "channel", "text": "failed (find logs in /host/logs/"}, + # failed message url reposted to tech support channel (C0001 in the mock) + {"channel": "C0001", "text": "http://test"}, + ], + ) + + with open(os.path.join(log_dir, "stderr")) as f: + assert f.read() == "cat: no-poem: No such file or directory\n" + + def test_python_job_success(mock_client): log_dir = build_log_dir("test_good_python_job") From 9c652e5d7d50d814a1f65aba9cd19d76d4b83500 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 4 Jan 2024 12:46:49 +0000 Subject: [PATCH 15/17] Add deploy step to github actions workflow --- .github/workflows/main.yml | 51 +++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 479f1e18..49f9ab3e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,15 +1,22 @@ --- name: CI +env: + IMAGE_NAME: bennettbot + PUBLIC_IMAGE_NAME: ghcr.io/ebmdatalab/bennettbot + REGISTRY: ghcr.io + SSH_AUTH_SOCK: /tmp/agent.sock + on: push: + workflow_dispatch: jobs: check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: opensafely-core/setup-action@v1 with: install-just: true @@ -22,7 +29,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: opensafely-core/setup-action@v1 with: install-just: true @@ -37,7 +44,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: opensafely-core/setup-action@v1 with: install-just: true @@ -49,9 +56,45 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: opensafely-core/setup-action@v1 with: install-just: true - name: Run tests in docker run: just docker/test + + deploy: + needs: [check, test, docker-check, docker-test] + + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + if: github.ref == 'refs/heads/main' + + concurrency: deploy-production + + steps: + - uses: actions/checkout@v4 + - uses: opensafely-core/setup-action@v1 + with: + install-just: true + + - name: Build docker image + run: | + just docker/build prod + + - name: Publish docker image + run: | + echo ${{ secrets.GITHUB_TOKEN }} | docker login $REGISTRY -u ${{ github.actor }} --password-stdin + docker tag $IMAGE_NAME $PUBLIC_IMAGE_NAME:latest + docker push $PUBLIC_IMAGE_NAME:latest + + - name: Deploy image + run: | + ssh-agent -a $SSH_AUTH_SOCK > /dev/null + ssh-add - <<< "${{ secrets.DOKKU3_DEPLOY_SSH_KEY }}" + SHA=$(docker inspect --format='{{index .RepoDigests 0}}' $PUBLIC_IMAGE_NAME:latest) + ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" dokku@dokku3.ebmdatalab.net git:from-image bennettbot $SHA From 28001d6df9deb52d5229bcedea80833fd6a8a5b7 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 5 Jan 2024 13:19:05 +0000 Subject: [PATCH 16/17] Update docs and Dockerfile typos --- DEPLOY.md | 11 +++++++---- docker/Dockerfile | 4 ++-- dotenv-sample | 1 - 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/DEPLOY.md b/DEPLOY.md index 790e380f..43be2566 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -1,7 +1,10 @@ ## Deployment -Deployment uses `dokku` and requires the environment variables defined in `dotenv-sample`. -It is deployed to our `dokku3` instance. +Deployment uses `dokku` and requires the environment variables defined +[below](#configure-app-environment-variables). + +It is deployed to our `dokku3` instance [automatically via GitHub actions](#deployment-via-github-actions). + It runs as a single dokku app named `bennettbot`, with multiple processes for each service (bot, dispatcher and webserver) as defined in the `Procfile`. @@ -118,7 +121,7 @@ Mount the user's home directory into the app. $ dokku storage:mount bennettbot /home/ebmbot/:/home/ebmbot ``` -Add the ebmbot user's key to any servers that it requires acces to +Add the ebmbot user's key to any servers that it requires access to (i.e. any jobs that run `fab` commands). ### Configure app environment variables @@ -182,7 +185,7 @@ https://dokku.com/docs/networking/port-management/ dokku ports:add bennettbot http:9999:9999 -## Deployment +## Deployment via GitHub Actions Merges to the `main` branch will trigger an auto-deploy via GitHub actions. diff --git a/docker/Dockerfile b/docker/Dockerfile index a2efaf42..fa6fd071 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -69,7 +69,7 @@ RUN --mount=type=cache,target=/root/.cache \ # # Not including the code at this stage has two benefits: # -# 1) this image only rebuilds when the handlful of files needed to build output-explorer-base +# 1) this image only rebuilds when the handlful of files needed to build bennettbot-base # changes. If we do `COPY . /app` now, this will rebuild when *any* file changes. # # 2) Ensures we *have* to mount the volume for dev image, as there's no embedded @@ -137,7 +137,7 @@ USER ${USERID}:${GROUPID} # # Dev image # -# Now we build a dev image from our output-explorer-base image. This is basically +# Now we build a dev image from our bennettbot-base image. This is basically # installing dev dependencies and changing the entrypoint # FROM bennettbot-base as bennettbot-dev diff --git a/dotenv-sample b/dotenv-sample index 2431f695..29a2ff7c 100644 --- a/dotenv-sample +++ b/dotenv-sample @@ -1,5 +1,4 @@ # See comments in ebmbot/settings.py -EBMBOT_PATH=$PATH:/app/ LOGS_DIR=changeme SLACK_LOGS_CHANNEL=changeme SLACK_TECH_SUPPORT_CHANNEL=changeme From 390eb74177281187f43c3dc01c061a061ff274ba Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 5 Jan 2024 13:08:30 +0000 Subject: [PATCH 17/17] Add a healthcheck to ensure bot is fully started up --- .gitignore | 1 + DEPLOY.md | 4 ++++ Procfile | 1 + app.json | 11 +++++++++++ bot_check.sh | 10 ++++++++++ ebmbot/bot.py | 22 +++++++++++++++++++++- ebmbot/settings.py | 6 ++++++ 7 files changed, 54 insertions(+), 1 deletion(-) create mode 100755 bot_check.sh diff --git a/.gitignore b/.gitignore index 52e38331..ad3169f4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ environment .env workspace/techsupport/ooo.json gcp-credentials.json +.bot_startup_check # System files *.py[co] diff --git a/DEPLOY.md b/DEPLOY.md index 43be2566..0e636fc9 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -166,6 +166,9 @@ the fabfile for running the commands). Set this to a directory in the dokku moun storage that the docker user will have write access to. - `FAB_WORKSPACE_DIR` +Path for file created after bot startup (used in the bot healthcheck in `app.json`). +- `BOT_CHECK_FILE` + Set each env varible with: ```sh $ dokku config:set bennettbot ENVVAR_NAME=value @@ -177,6 +180,7 @@ $ dokku config:set bennettbot LOGS_DIR=/storage/logs $ dokku config:set bennettbot HOST_LOGS_DIR=/var/lib/dokku/data/storage/bennettbot/logs $ dokku config:set bennettbot DB_PATH=/storage/bennettbot.db $ dokku config:set bennettbot FAB_WORKSPACE_DIR=/storage/workspace +$ dokku config:set bennettbot BOT_CHECK_FILE=/storage/.bot_startup_check ``` ### Map port 9999 for incoming github hooks diff --git a/Procfile b/Procfile index ef0c66ef..e77f575c 100644 --- a/Procfile +++ b/Procfile @@ -1,3 +1,4 @@ bot: python -m ebmbot.bot dispatcher: python -m ebmbot.dispatcher web: python -m ebmbot.webserver +release: rm -f /storage/.bot_startup_check diff --git a/app.json b/app.json index bdeeea8a..921ce9fb 100644 --- a/app.json +++ b/app.json @@ -10,6 +10,17 @@ "wait": 30, "timeout": 60 } + ], + "bot": [ + { + "type": "startup", + "name": "bot check", + "description": "Checking if the bot start up file exists", + "command": ["/app/bot_check.sh"], + "attempts": 10, + "wait": 30, + "timeout": 60 + } ] } } diff --git a/bot_check.sh b/bot_check.sh new file mode 100755 index 00000000..76f910ab --- /dev/null +++ b/bot_check.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +check_file_path=${BOT_CHECK_FILE:-.bot_startup_check} + +if test -f "$check_file_path"; then + echo "Start up checkfile exists: $check_file_path" +else + echo "Start up checkfile not found: $check_file_path" + exit 1 +fi diff --git a/ebmbot/bot.py b/ebmbot/bot.py index 030312d3..cd5f6e17 100644 --- a/ebmbot/bot.py +++ b/ebmbot/bot.py @@ -1,10 +1,12 @@ import random import re from datetime import datetime, timezone +from threading import Event from slack_bolt import App, BoltResponse from slack_bolt.adapter.socket_mode import SocketModeHandler from slack_bolt.error import BoltUnhandledRequestError +from slack_bolt.util.utils import get_boot_message from workspace.techsupport.jobs import get_dates_from_config as get_tech_support_dates @@ -12,6 +14,23 @@ from .logger import log_call, logger +class SocketModeCheckHandler(SocketModeHandler): + def start(self): # pragma: no cover + """ + Duplicates the behaviour of SocketModeHandler (establishes a new connection and then blocks the current thread to prevent the termination of this process). + + In addition, sets a file after connection that can used as a healthcheck to + ensure that the bot is fully running. In production, this file is removed in a dokku release task that runs pre-deploy. + """ + settings.BOT_CHECK_FILE.unlink(missing_ok=True) + + self.connect() + logger.info(get_boot_message()) + + settings.BOT_CHECK_FILE.touch() + Event().wait() + + def run(): # pragma: no cover """Start the bot running.""" app = App( @@ -20,13 +39,14 @@ def run(): # pragma: no cover # enable @app.error handler to catch the patterns we don't specifically handle raise_error_for_unhandled_request=True, ) - handler = SocketModeHandler(app, settings.SLACK_APP_TOKEN) + handler = SocketModeCheckHandler(app, settings.SLACK_APP_TOKEN) bot_user_id = get_bot_user_id(app.client) channels = get_channels(app.client) join_all_channels(app.client, channels, bot_user_id) register_listeners(app, job_configs.config, channels, bot_user_id) handler.start() + logger.info("Connected") def get_bot_user_id(client): diff --git a/ebmbot/settings.py b/ebmbot/settings.py index 142d3b1e..9c7568ce 100644 --- a/ebmbot/settings.py +++ b/ebmbot/settings.py @@ -28,6 +28,12 @@ SLACK_SIGNING_SECRET = env.str("SLACK_SIGNING_SECRET") SLACK_APP_USERNAME = env.str("SLACK_APP_USERNAME") +# Path to file created when the bot starts up. In production, this should be +# a path to a file in the mounted volume +BOT_CHECK_FILE = env.path( + "BOT_CHECK_FILE", default=APPLICATION_ROOT / ".bot_startup_check" +) + # Should match "Payload URL" from # https://github.com/ebmdatalab/openprescribing/settings/hooks/85994427 WEBHOOK_ORIGIN = env.str("WEBHOOK_ORIGIN")