Skip to content

Commit

Permalink
Add monitoring (#118)
Browse files Browse the repository at this point in the history
Migrate over the monitoring role from sysadmin to devops.

Related to: #27
  • Loading branch information
hellais authored Dec 19, 2024
1 parent ab129d6 commit cd8f0c6
Show file tree
Hide file tree
Showing 25 changed files with 946 additions and 16 deletions.
12 changes: 12 additions & 0 deletions ansible/ansible-playbook
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -ue

## ansible-playbook is a wrapper script used to send a notification to slack
# whenever a new ansible deploy is triggered

ANSIBLE_SLACK_CMD=`printf "%q " "$0" "$@"`
ANSIBLE_SLACK_CMD="${ANSIBLE_SLACK_CMD% }" # strip trailing whitespace
export ANSIBLE_SLACK_CMD

ansible localhost --module-name include_role --args name=notify-slack
ansible-playbook "$@"
7 changes: 7 additions & 0 deletions ansible/deploy-bootstrap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: Ensure all hosts are bootstrapped correctly
hosts: all
become: yes
roles:
- bootstrap
tags:
- bootstrap
10 changes: 10 additions & 0 deletions ansible/deploy-monitoring-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
- name: Update monitoring config
hosts: monitoring.ooni.org
become: true
tags:
- monitoring
roles:
- prometheus
- prometheus_blackbox_exporter
- prometheus_alertmanager
10 changes: 5 additions & 5 deletions ansible/deploy-monitoring.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
---
- name: Update monitoring config
- name: Deploy monitoring host
hosts: monitoring.ooni.org
become: true
tags:
- monitoring
roles:
- prometheus
- prometheus_blackbox_exporter
- prometheus_alertmanager

- monitoring
vars:
monitoring_htpasswd: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/monitoring_htpasswd', profile='oonidevops_user_prod') }}"

- ansible.builtin.import_playbook: deploy-monitoring-config.yml
13 changes: 9 additions & 4 deletions ansible/inventory
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[all:children]
htz-fsn
ghs-ams
htz_fsn
ghs_ams

## Role tags

Expand All @@ -12,13 +12,18 @@ data3.htz-fsn.prod.ooni.nu

## Location tags

[htz-fsn]
[htz_fsn]
data.ooni.org
monitoring.ooni.org
notebook.ooni.org
data1.htz-fsn.prod.ooni.nu
data2.htz-fsn.prod.ooni.nu
data3.htz-fsn.prod.ooni.nu

[ghs-ams]
[ghs_ams]
openvpn-server1.ooni.io
amsmatomo.ooni.nu
db-1.proteus.ooni.io
ams-slack-1.ooni.org
#mia-echoth.ooni.nu
#mia-httpth.ooni.nu
9 changes: 2 additions & 7 deletions ansible/playbook.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
---
- name: Ensure all hosts are bootstrapped correctly
hosts: all
become: yes
roles:
- bootstrap
tags:
- bootstrap
- name: Include bootstrap playbook
ansible.builtin.import_playbook: deploy-bootstrap.yml

- name: Include tier0 playbook
ansible.builtin.import_playbook: deploy-tier0.yml
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/monitoring/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
enable_log_ingestion: false
56 changes: 56 additions & 0 deletions ansible/roles/monitoring/files/create_logs_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
CREATE TABLE IF NOT EXISTS default.logs
(
`CODE_FILE` String,
`CODE_FUNC` String,
`CODE_LINE` String,
`INVOCATION_ID` String,
`LOGGER` LowCardinality(String),
`MESSAGE_ID` String,
`MESSAGE` String,
`PRIORITY` UInt8,
`PROCESS_NAME` String,
`SYSLOG_FACILITY` LowCardinality(String),
`SYSLOG_IDENTIFIER` LowCardinality(String),
`SYSLOG_PID` Nullable(UInt64),
`SYSLOG_TIMESTAMP` String,
`THREAD_NAME` String,
`TID` UInt64,
`UNIT` String,
`_AUDIT_LOGINUID` Nullable(UInt64),
`_AUDIT_SESSION` Nullable(UInt64),
`_BOOT_ID` String,
`_CAP_EFFECTIVE` String,
`_CMDLINE` String,
`_COMM` LowCardinality(String),
`_EXE` LowCardinality(String),
`_GID` LowCardinality(UInt32),
`_HOSTNAME` String,
`_KERNEL_DEVICE` String,
`_KERNEL_SUBSYSTEM` String,
`_MACHINE_ID` String,
`_PID` UInt32,
`_SELINUX_CONTEXT` String,
`_SOURCE_MONOTONIC_TIMESTAMP` Nullable(Int64),
`_SOURCE_REALTIME_TIMESTAMP` Int64,
`_STREAM_ID` String,
`_SYSTEMD_CGROUP` LowCardinality(String),
`_SYSTEMD_INVOCATION_ID` String,
`_SYSTEMD_SLICE` String,
`_SYSTEMD_UNIT` LowCardinality(String),
`_TRANSPORT` LowCardinality(String),
`_UDEV_SYSNAME` String,
`_UID` LowCardinality(UInt32),
`__CURSOR` String,
`__MONOTONIC_TIMESTAMP` Nullable(Int64),
`__REALTIME_TIMESTAMP` Int64,
`date` DateTime64(6) ALIAS fromUnixTimestamp64Micro(_SOURCE_REALTIME_TIMESTAMP),
`host` LowCardinality(String),
`inserted_at` DateTime DEFAULT now(),
`message` String,
`rtdate` DateTime64(6) ALIAS fromUnixTimestamp64Micro(__REALTIME_TIMESTAMP),
`timestamp` String,
INDEX timestamp_minmax_idx timestamp TYPE minmax GRANULARITY 1
)
ENGINE = MergeTree
ORDER BY __REALTIME_TIMESTAMP
SETTINGS index_granularity = 8192
17 changes: 17 additions & 0 deletions ansible/roles/monitoring/files/log-ingestion.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[Unit]
Description=log ingestion

[Service]
ExecStart=/bin/sh -c 'journalctl -ojson -f | clickhouse-client --query="INSERT INTO logs FORMAT JSONEachRow" --input_format_skip_unknown_fields=1 --input_format_allow_errors_ratio=1'

SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap
NoNewPrivileges=yes
PrivateDevices=yes
PrivateTmp=yes
ProtectHome=yes
ProtectSystem=full
ProtectKernelModules=yes
ProtectKernelTunables=yes

[Install]
WantedBy=multi-user.target
85 changes: 85 additions & 0 deletions ansible/roles/monitoring/tasks/log-ingestion.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# # Vector

- name: vector - enable repo
tags: vector
shell: extrepo enable vector && extrepo update vector

- name: vector - install pkg
tags: vector
apt:
# refresh cache
cache_valid_time: 0
name:
- vector

- name: vector - deploy SQL file to create logs table
tags: vector
copy:
src: create_logs_table.sql
dest: /etc/clickhouse-server/create_logs_table.sql

- name: vector - create vector_logs table
tags: vector
command: clickhouse-client --multiline --multiquery --queries-file /etc/clickhouse-server/create_logs_table.sql

- name: vector - Generate syslog certificates
tags: vector
# runs locally
delegate_to: 127.0.0.1
shell: |
./vault view files/pusher_ca.key.vault | openssl req -x509 -new -nodes -key /dev/stdin -sha256 -days 3650 -subj '/O=OONI/OU=CA/CN=ooni.org' -out oonicacert.pem
openssl req -newkey rsa:2048 -nodes -days 3650 -keyout node.key -out node-req.pem -subj '/CN=ooni.org/O=OONI temp CA/C=US' -batch
./vault view files/pusher_ca.key.vault | openssl x509 -req -days 3650 -set_serial 01 -in node-req.pem -out node-cert.pem -CA oonicacert.pem -CAkey /dev/stdin
register: certs_ready

- name: vector - Copy TLS certs
tags: vector
ansible.builtin.copy:
src: "{{ item }}"
dest: /etc/vector/
mode: '0440'
owner: vector
loop:
- oonicacert.pem
- node-cert.pem
- node.key
when: certs_ready.changed

- name: vector - Delete files
tags: vector
# runs locally
delegate_to: 127.0.0.1
ansible.builtin.file:
path: "{{ item }}"
state: absent
loop:
- node-cert.pem
- node-req.pem
- node.key
- oonicacert.pem

- name: vector - configure
tags: vector
template:
src: templates/vector.toml
dest: /etc/vector/vector.toml

- name: vector - open port
tags: vector
ansible.builtin.copy:
src: templates/10514.nft
dest: /etc/ooni/nftables/tcp/
register: nft_reload_needed

- name: vector - reload nft
tags: vector
shell: systemctl reload nftables.service
when: nft_reload_needed.changed

- name: vector - restart service
tags: vector
systemd:
daemon_reload: yes
enabled: yes
name: vector.service
state: restarted
Loading

0 comments on commit cd8f0c6

Please sign in to comment.