diff --git a/ehrql/__main__.py b/ehrql/__main__.py index 286abdd7e..5663b0c4d 100644 --- a/ehrql/__main__.py +++ b/ehrql/__main__.py @@ -2,6 +2,7 @@ import logging import os import sys +import traceback import warnings from argparse import ArgumentParser, ArgumentTypeError, RawTextHelpFormatter from pathlib import Path @@ -109,6 +110,17 @@ def main(args, environ=None): # Handle errors encountered while reading user-supplied data print(f"{exc.__class__.__name__}: {exc}", file=sys.stderr) sys.exit(1) + except Exception as exc: + # For functions which take a `backend_class` give that class the chance to set + # the appropriate exit status for any errors + if backend_class := kwargs.get("backend_class"): + if exit_status := backend_class(environ).get_exit_status_for_exception( + exc + ): # pragma: no branch + # log the traceback to stderr to aid debugging + traceback.print_exc() + sys.exit(exit_status) + raise finally: root_logger.setLevel(orig_log_level) diff --git a/ehrql/backends/base.py b/ehrql/backends/base.py index c6126a0a4..d5866f02c 100644 --- a/ehrql/backends/base.py +++ b/ehrql/backends/base.py @@ -41,6 +41,9 @@ def modify_query_pre_reify(self, query): """ return query + def get_exit_status_for_exception(self, exception): + return None + class SQLBackend(BaseBackend): query_engine_class = None diff --git a/ehrql/backends/tpp.py b/ehrql/backends/tpp.py index dae008e16..050d4af65 100644 --- a/ehrql/backends/tpp.py +++ b/ehrql/backends/tpp.py @@ -126,6 +126,34 @@ def modify_query_variables(self, variables): ) return variables + def get_exit_status_for_exception(self, exception): + # Checking for "DatabaseError" in the MRO means we can identify database errors without + # referencing a specific driver. Both pymssql and presto/trino-python-client raise + # exceptions derived from a DatabaseError parent class + + # Ignore errors which don't look like database errors + if "DatabaseError" not in str(exception.__class__.mro()): + return + + # Exit with specific exit codes to help identify known issues + transient_errors = [ + "Unexpected EOF from the server", + "DBPROCESS is dead or not enabled", + ] + if any(message in str(exception) for message in transient_errors): + exception.add_note(f"\nIntermittent database error: {exception}") + return 3 + + if "Invalid object name 'CodedEvent_SNOMED'" in str(exception): + exception.add_note( + "\nCodedEvent_SNOMED table is currently not available.\n" + "This is likely due to regular database maintenance." + ) + return 4 + + exception.add_note(f"\nDatabase error: {exception}") + return 5 + allowed_patients = MappedTable( # This table has its name for historical reasons, and reads slightly oddly: it # should be interpreted as "allowed patients with regard to type one dissents" diff --git a/tests/acceptance/test_embedded_study.py b/tests/acceptance/test_embedded_study.py index 8c6549897..968b51de3 100644 --- a/tests/acceptance/test_embedded_study.py +++ b/tests/acceptance/test_embedded_study.py @@ -4,6 +4,7 @@ from ehrql.file_formats import FILE_FORMATS from tests.lib.fixtures import ( + database_operational_error_dataset_definition, invalid_dataset_attribute_dataset_definition, invalid_dataset_query_model_error_definition, no_dataset_attribute_dataset_definition, @@ -74,6 +75,20 @@ def test_parameterised_dataset_definition_with_bad_param(study, mssql_database, ) +def test_generate_dataset_with_database_error(study, mssql_database): + mssql_database.setup( + Patient(Patient_ID=1, DateOfBirth=datetime(1934, 5, 5)), + AllowedPatientsWithTypeOneDissent(Patient_ID=1), + ) + + # This dataset definition triggers an OperationalError by implementing date + # arithmetic that results in an out of bounds date (after 9999-12-31) + study.setup_from_string(database_operational_error_dataset_definition) + with pytest.raises(SystemExit) as err: + study.generate(mssql_database, "ehrql.backends.tpp.TPPBackend") + assert err.value.code == 5 + + def test_dump_dataset_sql_happy_path(study, mssql_database): study.setup_from_string(trivial_dataset_definition) study.dump_dataset_sql() diff --git a/tests/integration/backends/test_emis.py b/tests/integration/backends/test_emis.py index 2a4d06182..1896f7b58 100644 --- a/tests/integration/backends/test_emis.py +++ b/tests/integration/backends/test_emis.py @@ -1,6 +1,8 @@ from datetime import date, datetime +import pytest import sqlalchemy +from trino import exceptions as trino_exceptions from ehrql import create_dataset from ehrql.backends.emis import EMISBackend @@ -78,6 +80,28 @@ def get_all_backend_columns_with_types(trino_database): temp_table.drop(trino_database.engine()) +@pytest.mark.parametrize( + "exception", + [ + # These are trino errors that we may want to support in future with + # custom exit codes, but currently inherit from the base method + # Database errors + trino_exceptions.DatabaseError, + # OperationError is a subclass of DatabaseError + trino_exceptions.OperationalError, + # TrinoQueryError is encountered for over-complex/over-nested queries + trino_exceptions.TrinoQueryError, + # TrinoUserError is encountered for out of range numbers + trino_exceptions.TrinoUserError, + # TrinoUserError is encountered for bad/out of range dates + trino_exceptions.TrinoDataError, + ], +) +def test_backend_exceptions(exception): + backend = EMISBackend() + assert backend.get_exit_status_for_exception(exception) is None + + @register_test_for(emis.clinical_events) def test_clinical_events(select_all_emis): results = select_all_emis( diff --git a/tests/integration/backends/test_tpp.py b/tests/integration/backends/test_tpp.py index c36f52f97..7803fcd1b 100644 --- a/tests/integration/backends/test_tpp.py +++ b/tests/integration/backends/test_tpp.py @@ -3,6 +3,7 @@ import pytest import sqlalchemy +from pymssql import exceptions as pymssql_exceptions from ehrql import create_dataset from ehrql.backends.tpp import TPPBackend @@ -132,6 +133,48 @@ def get_all_backend_columns_with_types(mssql_database): yield table, column, column_type, column_args +@pytest.mark.parametrize( + "exception,exit_code,custom_err", + [ + ( + pymssql_exceptions.OperationalError("Unexpected EOF from the server"), + 3, + "Intermittent database error", + ), + ( + pymssql_exceptions.OperationalError("DBPROCESS is dead or not enabled"), + 3, + "Intermittent database error", + ), + ( + pymssql_exceptions.OperationalError( + "Invalid object name 'CodedEvent_SNOMED'" + ), + 4, + "CodedEvent_SNOMED table is currently not available", + ), + (pymssql_exceptions.DataError("Database data error"), 5, "Database error"), + ( + pymssql_exceptions.InternalError("Other database internal error"), + 5, + "Database error", + ), + ( + pymssql_exceptions.DatabaseError("A plain old database error"), + 5, + "Database error", + ), + (Exception("Other non-database error exception"), None, None), + ], +) +def test_backend_exceptions(exception, exit_code, custom_err): + backend = TPPBackend() + assert backend.get_exit_status_for_exception(exception) == exit_code + + if custom_err is not None: # pragma: no cover + assert any(custom_err in note for note in exception.__notes__) + + @register_test_for(tpp.addresses) def test_addresses(select_all_tpp): results = select_all_tpp( diff --git a/tests/lib/fixtures.py b/tests/lib/fixtures.py index 2674fb9e8..b16bfc2e7 100644 --- a/tests/lib/fixtures.py +++ b/tests/lib/fixtures.py @@ -1,3 +1,14 @@ +database_operational_error_dataset_definition = """ +from ehrql import Dataset, years +from ehrql.tables.core import patients + +dataset = Dataset() +dataset.define_population(patients.date_of_birth.year >= 1900) +dataset.extended_dob = patients.date_of_birth + years(9999) + +dataset.configure_dummy_data(population_size=10) +""" + trivial_dataset_definition = """ from ehrql import Dataset from ehrql.tables.tpp import patients