From 99e908e33ec2a8f3cba9e765cd40ded86b740ef0 Mon Sep 17 00:00:00 2001 From: Damien Cupif Date: Mon, 27 Nov 2023 18:33:08 +0100 Subject: [PATCH] feat(rds): add alerting on CA certificates expiration This commit adds a new alert that will trigger if it detects any instance with a CA certificate with an expiration date scheduled within the next 15 days. --- .../RDSCACertificateCloseToExpiration.yml | 41 +++++++++++ charts/prometheus-rds-alerts/values.yaml | 10 +++ .../rds/RDSCACertificateCloseToExpiration.md | 73 +++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 charts/prometheus-rds-alerts/prometheus_tests/RDSCACertificateCloseToExpiration.yml create mode 100644 content/runbooks/rds/RDSCACertificateCloseToExpiration.md diff --git a/charts/prometheus-rds-alerts/prometheus_tests/RDSCACertificateCloseToExpiration.yml b/charts/prometheus-rds-alerts/prometheus_tests/RDSCACertificateCloseToExpiration.yml new file mode 100644 index 0000000..bc75421 --- /dev/null +++ b/charts/prometheus-rds-alerts/prometheus_tests/RDSCACertificateCloseToExpiration.yml @@ -0,0 +1,41 @@ +rule_files: + - rules.yml + +evaluation_interval: 1m + +tests: + + - name: RDSCACertificateCloseToExpiration + interval: 1d + input_series: + - series: 'rds_certificate_expiry_timestamp_seconds{aws_account_id="111111111111",aws_region="eu-west-3",dbidentifier="db1"}' + values: '1728000x40' # 1728000 seconds = 20 days + - series: 'rds_certificate_expiry_timestamp_seconds{aws_account_id="111111111111",aws_region="eu-west-3",dbidentifier="db2"}' + values: '2629800x40' # 2629800 seconds = 1 month + - series: 'rds_certificate_expiry_timestamp_seconds{aws_account_id="111111111111",aws_region="eu-west-1",dbidentifier="db1"}' + values: '1728000x40' # 1728000 seconds = 20 days + - series: 'rds_certificate_expiry_timestamp_seconds{aws_account_id="222222222222",aws_region="eu-west-3",dbidentifier="db1"}' + values: '2629800x40' # 2629800 seconds = 1 month + alert_rule_test: + - alertname: RDSCACertificateCloseToExpiration + eval_time: 4d + exp_alerts: [] + - alertname: RDSCACertificateCloseToExpiration + eval_time: 6d + exp_alerts: + - exp_labels: + aws_account_id: 111111111111 + aws_region: eu-west-3 + severity: warning + exp_annotations: + description: "1 instance(s) of the AWS account ID=111111111111 in region=eu-west-3 use(s) a certificate with an expiration date inferior to 15 days" + summary: "RDS instance(s) use(s) a certificate with an expiration date inferior to 15 days" + runbook_url: "https://qonto.github.io/database-monitoring-framework/0.0.0/runbooks/rds/RDSCACertificateCloseToExpiration" + - exp_labels: + aws_account_id: 111111111111 + aws_region: eu-west-1 + severity: warning + exp_annotations: + description: "1 instance(s) of the AWS account ID=111111111111 in region=eu-west-1 use(s) a certificate with an expiration date inferior to 15 days" + summary: "RDS instance(s) use(s) a certificate with an expiration date inferior to 15 days" + runbook_url: "https://qonto.github.io/database-monitoring-framework/0.0.0/runbooks/rds/RDSCACertificateCloseToExpiration" diff --git a/charts/prometheus-rds-alerts/values.yaml b/charts/prometheus-rds-alerts/values.yaml index 90ed6ce..bcfca4d 100644 --- a/charts/prometheus-rds-alerts/values.yaml +++ b/charts/prometheus-rds-alerts/values.yaml @@ -184,3 +184,13 @@ rules: description: "{{ $labels.dbidentifier }} has forced maintenance" pintComments: - disable promql/series + + RDSCACertificateCloseToExpiration: + expr: | + # 1296000 seconds = 15 days + count by (aws_account_id, aws_region) (rds_certificate_expiry_timestamp_seconds - time() <= 1296000) > 0 + labels: + severity: warning + annotations: + summary: "RDS instance(s) use(s) a certificate with an expiration date inferior to 15 days" + description: "{{ $value }} instance(s) of the AWS account ID={{ $labels.aws_account_id}} in region={{ $labels.aws_region }} use(s) a certificate with an expiration date inferior to 15 days" diff --git a/content/runbooks/rds/RDSCACertificateCloseToExpiration.md b/content/runbooks/rds/RDSCACertificateCloseToExpiration.md new file mode 100644 index 0000000..6ead6fb --- /dev/null +++ b/content/runbooks/rds/RDSCACertificateCloseToExpiration.md @@ -0,0 +1,73 @@ +--- +title: CA Certificate Close to Expiration +--- + +# RDSCACertificateCloseToExpiration + +## Meaning + +Alert is triggered when an RDS instance is detected using a CA certificate which is going to expire in less than 15 days. + +## Impact + +If the certificate is not renewed before expiration, all attempts to initiate an SSL/TLS connection to the RDS instance will fail. + +{{< hint warning >}} +**Important** + +The `Amazon RDS Root 2019 CA` certificate expires on **Aug 22 17:08:50 2024 UTC**. + +- Starting January 25th 2024, RDS instances created without specifying the CA will use `rds-ca-rsa2048-g1``. +- In August 2024, AWS will enforce the CA rotation on all RDS instances on the expiring CA during a window maintenance +{{< /hint >}} + +## Diagnosis + +- Identify the instance(s) concerned by either: + - opening the `RDS instances` dashboard + - or using the following AWS CLI command + + ```bash + aws rds describe-db-instances | jq ' + [ + .DBInstances[] | + { + db_instance_identifier: .DBInstanceIdentifier, + ca_certificate_identifier: .CACertificateIdentifier, + ca_certificate_valid_until: .CertificateDetails.ValidTill + } | + (now + 1296000) as $date | + select ( + (.ca_certificate_valid_until | split("+")[0] + "Z" | fromdate) < $date + ) + ]' + ``` + + Note: `1296000` seconds = 15 days + +## Mitigation + +Renew your certificate for the instances retrieved above by running: + +```bash +aws rds modify-db-instance \ + --db-instance-identifier \ + --ca-certificate-identifier +``` + +Use the `--apply-immediately` flag if you wish to change the certificate immediately, otherwise it will apply during your next scheduled maintenance window. + +{{< hint info >}} +**Tips** + +We recommend using the `rds-ca-rsa2048-g1` certificate authority which: + +- Has the same properties as `rds-ca-2019` (2048 private key, SHA256 signing alg.) so no risk of incompatibility +- Is valid until 2061 +- Change can be done without restarting the instances +{{< /hint >}} + +## Additional resources + +- [Using SSL with RDS](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.SSL.html) +- [SSL Certificate Rotation](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/UsingWithRDS.SSL-certificate-rotation.html)