From d3be8fdd92d1adef9e1e0457b04689d8403f1ff8 Mon Sep 17 00:00:00 2001 From: Will Hulme <25637345+wjchulme@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:41:14 +0000 Subject: [PATCH] move to dedicated folder and tidy --- analysis/{ => PRIMIS}/codelists.py | 81 +----- analysis/PRIMIS/dataset_definition.py | 48 ++++ analysis/PRIMIS/variables_function.py | 321 ++++++++++++++++++++++ analysis/dataset_definition_all_var.py | 37 --- analysis/dataset_definition_spec_var.py | 63 ----- analysis/variables_function.py | 344 ------------------------ codelists/codelists.txt | 31 +-- project.yaml | 15 +- 8 files changed, 391 insertions(+), 549 deletions(-) rename analysis/{ => PRIMIS}/codelists.py (70%) create mode 100644 analysis/PRIMIS/dataset_definition.py create mode 100644 analysis/PRIMIS/variables_function.py delete mode 100644 analysis/dataset_definition_all_var.py delete mode 100644 analysis/dataset_definition_spec_var.py delete mode 100644 analysis/variables_function.py diff --git a/analysis/codelists.py b/analysis/PRIMIS/codelists.py similarity index 70% rename from analysis/codelists.py rename to analysis/PRIMIS/codelists.py index 5568754..785d5d6 100644 --- a/analysis/codelists.py +++ b/analysis/PRIMIS/codelists.py @@ -1,33 +1,13 @@ -# Purpose: -# define codelist objects from codelist files imported by codelist.txt spec - -# Import code building blocks from cohort extractor package +# Import code building blocks from ehrql package from ehrql import codelist_from_csv - -## --VARIABLES-- -# if the variable uses a codelist then it should be added below -# after updating the codelist.txt configuration and importing the codelist - -# Ethnicity - -ethnicity_codelist5 = codelist_from_csv( - "codelists/opensafely-ethnicity-snomed-0removed.csv", - column="code", - category_column="Label_6", # it's 6 because there is an additional "6 - Not stated" but this is not represented in SNOMED, instead corresponding to no ethnicity code -) - -ethnicity_codelist16 = codelist_from_csv( - "codelists/opensafely-ethnicity-snomed-0removed.csv", - column="code", - category_column="Label_16", -) - ####################################################### # PRIMIS ####################################################### -#Asthma + +# Asthma + ## Asthma Diagnosis code ast = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-ast.csv", @@ -65,6 +45,7 @@ ) # CKD + ## Chronic kidney disease diagnostic codes ckd_cov = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-ckd_cov.csv", @@ -89,7 +70,8 @@ column="code", ) -# DB +# Diabetes + ## Diabetes diagnosis codes diab = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-diab.csv", @@ -180,19 +162,21 @@ column="code", ) -# BMI +# Severe Obesity + +## BMI bmi = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-bmi.csv", column="code", ) -# All BMI coded terms +## All BMI coded terms bmi_stage = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-bmi_stage.csv", column="code", ) -# Severe Obesity code recorded +## Severe Obesity code recorded sev_obesity = codelist_from_csv( "codelists/primis-covid19-vacc-uptake-sev_obesity.csv", column="code", @@ -203,44 +187,3 @@ "codelists/primis-covid19-vacc-uptake-learndis.csv", column="code", ) - - -# Cancer - -cancer_haem_snomed=codelist_from_csv( - "codelists/opensafely-haematological-cancer-snomed.csv", - column="id", -) - -cancer_nonhaem_nonlung_snomed=codelist_from_csv( - "codelists/opensafely-cancer-excluding-lung-and-haematological-snomed.csv", - column="id", -) - -cancer_lung_snomed=codelist_from_csv( - "codelists/opensafely-lung-cancer-snomed.csv", - column="id", -) - -chemotherapy_radiotherapy_snomed = codelist_from_csv( - "codelists/opensafely-chemotherapy-or-radiotherapy-snomed.csv", - column = "id" -) - -cancer_nonhaem_snomed = ( - cancer_nonhaem_nonlung_snomed + - cancer_lung_snomed + - chemotherapy_radiotherapy_snomed -) - -# solid organ transplant -solid_organ_transplant=codelist_from_csv( - "codelists/opensafely-solid-organ-transplantation-snomed.csv", - column="id", -) - -# HIV/AIDS -hiv_aids=codelist_from_csv( - "codelists/nhsd-hiv-aids-snomed.csv", - column="code", -) \ No newline at end of file diff --git a/analysis/PRIMIS/dataset_definition.py b/analysis/PRIMIS/dataset_definition.py new file mode 100644 index 0000000..cf7cfce --- /dev/null +++ b/analysis/PRIMIS/dataset_definition.py @@ -0,0 +1,48 @@ +from ehrql import create_dataset +from ehrql.tables.tpp import patients, practice_registrations + +# import variable definitions +from variables_function import * + +#Import codelists +from codelists import * + +# initialise dataset +dataset = create_dataset() + +# Choose an index date +index_date = "2020-12-08" + +#Dummy data +dataset.configure_dummy_data(population_size=1000) + +# define dataset population +dataset.define_population( + practice_registrations.for_patient_on(index_date).exists_for_patient() & + ((patients.date_of_death> index_date) | patients.date_of_death.is_null()) +) + +# Example 1: Add specific PRIMIS variables + +dataset.immunosuppressed = is_immunosuppressed(index_date) #immunosuppress grouped +dataset.ckd = has_ckd(index_date) #chronic kidney disease +dataset.crd = has_crd(index_date) # chronis respratory disease +dataset.diabetes = has_diabetes(index_date) #diabetes +dataset.cld = has_prior_event(cld, index_date) # chronic liver disease +dataset.chd = has_prior_event(chd_cov, index_date) #chronic heart disease +dataset.cns = has_prior_event(cns_cov, index_date) # chronic neurological disease +dataset.asplenia = has_prior_event(spln_cov, index_date) # asplenia or dysfunction of the Spleen +dataset.learndis = has_prior_event(learndis, index_date) # learning Disability +dataset.smi = has_smi(index_date) #severe mental illness +dataset.severe_obesity = has_severe_obesity(index_date) #immunosuppress grouped + +# Example 2: add the single PRIMIS "at risk" variable + +dataset.primis_atrisk = primis_atrisk(index_date) # at risk (at least one of the conditions above) + +# EXAMPLE 3: alternatively, use the `primis_variables` function to add variables programmatically: + +for i in range(0, 2): + suffix = f"_{i}" + primis_variables(dataset = dataset, index_date = index_date+years(i), var_name_suffix = suffix) + diff --git a/analysis/PRIMIS/variables_function.py b/analysis/PRIMIS/variables_function.py new file mode 100644 index 0000000..e910ab9 --- /dev/null +++ b/analysis/PRIMIS/variables_function.py @@ -0,0 +1,321 @@ +# This study definition defines implements the PRIMIS specification for identifying those considered to be +# "clinically vulnerable" or "clinically extremely vulnerable". +# PRIMIS are the company responsible for creating clinical codelists and algorithms to define those eligible for COVID-19 vaccination +# as advised by The Joint Committee for Vaccination and Immunisation (JCVI) and published in Chapter 14a of the Green Book. +# The variables below are an implementation of these definitions in ehrQL. +# The current specification is based on version XXXXXX + + +# Green book: +# https://www.gov.uk/government/publications/covid-19-the-green-book-chapter-14a +# Clinical risk groups aged >16 years (16/9/2024): +# chronic respiratory disease (include asthma), +# chronic heart disease and vascular disease, +# chronic kidney disease +# chronic liver disease +# chronic neurological disease (include severe learning disability) +# diabetes mellitus and other endocrine disorders +# immunosupression +# asplenia or dysfunction of the spleen +# morbid obesity +# severe mental illness +# x young adults Younger adults in long-stay nursing and residential care settings +# x pregnancy + + +##################################################### +# Import relevant functions and scripts +##################################################### + +from ehrql import case, when, days, years + +from codelists import * + +from ehrql.tables.core import ( + medications, + patients +) + +from ehrql.tables.tpp import ( + clinical_events, +) + + +##################################################### +# Common functions for contructing clinical queries +##################################################### + +# events occurring before a specified date + +# query prior_events for existence of event-in-codelist, returns a patientSeries +def has_prior_event(codelist, index_date, where=True): + prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) + return ( + prior_events + .where(where) + .where(prior_events.snomedct_code.is_in(codelist)) + .exists_for_patient() + ) + +# query prior_events for date of most recent event-in-codelist, returns a patientFrame +def last_prior_event(codelist, index_date, where=True): + prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) + return ( + prior_events.where(where) + .where(prior_events.snomedct_code.is_in(codelist)) + .sort_by(clinical_events.date) + .last_for_patient() + ) + +# meds occurring before a specified date + +# query prior_meds for existence of event-in-codelist, returns a patientSeries +def has_prior_meds(codelist, index_date, where=True): + prior_meds = medications.where(medications.date.is_on_or_before(index_date)) + return ( + prior_meds.where(where) + .where(prior_meds.dmd_code.is_in(codelist)) + .exists_for_patient() + ) + +# query prior meds for date of most recent med-in-codelist, returns a patientFrame +def last_prior_meds(codelist, index_date, where=True): + prior_meds = medications.where(medications.date.is_on_or_before(index_date)) + return ( + prior_meds.where(where) + .where(prior_meds.dmd_code.is_in(codelist)) + .sort_by(medications.date) + .last_for_patient() + ) + + +####################################################### +# PRIMIS +####################################################### + +# Asthma +def has_asthma(index_date): + # Asthma diagnosis + has_astdx = has_prior_event(ast, index_date) + # Asthma admision + has_asthadm = has_prior_event( + astadm, + index_date, + where = clinical_events.date.is_on_or_between(index_date - years(2), index_date) + ) + # Inhaled asthma prescription in previous year + has_astrx_inhaled = has_prior_meds( + astrxm1, + index_date, + where = medications.date.is_on_or_after(index_date - years(1)) + ) + # count of systemic steroid prescription inpast 2 years + prior_meds = medications.where(medications.date.is_on_or_before(index_date)) + count_astrx_oral = ( + prior_meds + .where(prior_meds.dmd_code.is_in(astrx)) + .where(prior_meds.date.is_on_or_between(index_date - years(2), index_date)) + .count_for_patient() + ) + # Asthma + asthma = case( + when(has_asthadm).then(True), + when(has_astdx & has_astrx_inhaled & (count_astrx_oral >= 2)).then(True), + otherwise=False + ) + return asthma + +# Chronic Kidney Disease (CKD) +def has_ckd(index_date): + # Chronic kidney disease diagnostic codes + has_ckd_cov = has_prior_event(ckd_cov, index_date) + # Chronic kidney disease codes - all stages + ckd15_date = last_prior_event(ckd15, index_date).date + # Chronic kidney disease codes-stages 3 - 5 + ckd35_date = last_prior_event(ckd35, index_date).date + # Chronic kidney disease + ckd = case( + when(has_ckd_cov).then(True), + when(ckd15_date.is_null()).then(False), + when((ckd35_date >= ckd15_date)).then(True), + otherwise=False + ) + return ckd + +# Chronic Respiratory Disease (CRD) +def has_crd(index_date, where=True): + has_resp_cov = has_prior_event(resp_cov, index_date) + has_crd = has_resp_cov | has_asthma(index_date) + return has_crd + +# Severe Obesity +def has_severe_obesity(index_date): + # Severe obesity only defined for people aged 18 and over + aged18plus = patients.age_on(index_date) >= 18 + # Last BMI stage event + date_bmi_stage = last_prior_event( + bmi_stage, + index_date + ).date + # Last severe obesity event + date_sev_obesity = last_prior_event( + sev_obesity, + index_date + ).date + # Last BMI event not null + event_bmi = last_prior_event( + bmi, + index_date, + where=(clinical_events.numeric_value.is_not_null()) + ) + # Severe obesity + severe_obesity = case( + when(aged18plus).then(False), + when( + (date_sev_obesity > event_bmi.date) | + (date_sev_obesity.is_not_null() & event_bmi.date.is_null()) + ).then(True), + when( + (event_bmi.date >= date_bmi_stage) & + (event_bmi.numeric_value >= 40.0) + ).then(True), + when( + (date_bmi_stage.is_null()) & + (event_bmi.numeric_value >= 40.0) + ).then(True), + otherwise=False + ) + return severe_obesity + +# Pregnant variable to identify gestational diabetes +def has_pregnancy(index_date): + # Pregnancy delivery code date (a delivery code between 8 and 15 months prior to index date) + pregAdel_date = last_prior_event( + pregdel, + index_date, + where=clinical_events.date.is_on_or_between(index_date - days(7 * 65), index_date - days((7 * 30) + 1)) + ).date + # Pregnancy: 8 months and 15 months (a pregnancy code between 8 and 15 months prior to index date) + pregA_date = last_prior_event( + preg, + index_date, + where=clinical_events.date.is_on_or_between(index_date - days(7 * 65), index_date - days((7 * 30) + 1)) + ).date + # Pregnancy: <8 months (a pregnancy code within 8 months prior to index date) + pregB = has_prior_event( + preg, + index_date, + where=clinical_events.date.is_on_or_between(index_date - days(7 * 30), index_date) + ) + # Pregnancy group + has_pregnancy = case( + when(pregB).then(True), + when( + pregAdel_date.is_not_null() & + pregA_date.is_not_null() & + (pregA_date > pregAdel_date) + ).then(True), + otherwise=False + ) + return has_pregnancy + +# Diabetes +def has_diabetes(index_date, where=True): + date_diab = last_prior_event(diab, index_date).date + date_dmres = last_prior_event(dmres, index_date).date + has_gdiab = has_prior_event(gdiab, index_date) + has_diab_group = has_gdiab & has_pregnancy(index_date) + has_addis = has_prior_event(addis, index_date) + # Diabetes condition + diabetes = case( + when(date_dmres < date_diab).then(True), + when(date_diab.is_not_null() & date_dmres.is_null()).then(True), + when(has_addis).then(True), + when(has_diab_group).then(True), + otherwise=False + ) + return diabetes + +# Immunosuppression +def is_immunosuppressed(index_date): + # Immunosuppression diagnosis + has_immdx_cov = has_prior_event( + immdx_cov, + index_date + ) + # Immunosuppression medication (within the last 3 years) + has_immrx = has_prior_meds( + immrx, + index_date, + where=medications.date.is_on_or_after(index_date - years(3)) + ) + # Immunosuppression admin date (within the last 3 years) + has_immadm = has_prior_event( + immadm, + index_date, + where=clinical_events.date.is_on_or_after(index_date - years(3)) + ) + # Chemotherapy medication date (within the last 3 years) + has_dxt_chemo = has_prior_event( + dxt_chemo, + index_date, + where=clinical_events.date.is_on_or_after(index_date - years(3)) + ) + # Immunosuppression + immunosupp = case( + when(has_immdx_cov).then(True), + when(has_immrx).then(True), + when(has_immadm).then(True), + when(has_dxt_chemo).then(True), + otherwise=False + ) + return immunosupp + +# Severe mental illness +def has_smi(index_date, where=True): + date_sev_mental = last_prior_event(sev_mental, index_date).date + # Remission codes relating to Severe Mental Illness + date_smhres = last_prior_event(smhres, index_date).date + # Severe mental illness + smi = case( + when(date_smhres < date_sev_mental).then(True), + when(date_sev_mental.is_not_null() & date_smhres.is_null()).then(True), + otherwise=False + ) + return smi + +# At risk group +def primis_atrisk(index_date): + + # This definition excludes the following groups: + # younger adults in long-stay nursing and residential care settings + # pregnancy + + return ( + is_immunosuppressed(index_date) | # immunosuppression grouped + has_ckd(index_date) | # chronic kidney disease + has_crd(index_date) | # chronic respiratory disease + has_diabetes(index_date) | # diabetes + has_prior_event(cld, index_date) | # chronic liver disease + has_prior_event(cns_cov, index_date) | # chronic neurological disease + has_prior_event(chd_cov, index_date) | # chronic heart disease + has_prior_event(spln_cov, index_date) | # asplenia or spleen dysfunction + has_prior_event(learndis, index_date) | # learning disability + has_smi(index_date) | # severe mental illness + has_severe_obesity(index_date) # severe obesity + ) + +## function to define variables across multiple dataset definitions +def primis_variables(dataset, index_date, var_name_suffix=""): + dataset.add_column(f"immunosuppressed{var_name_suffix}", is_immunosuppressed(index_date)) #immunosuppress grouped + dataset.add_column(f"ckd{var_name_suffix}", has_ckd(index_date)) #chronic kidney disease + dataset.add_column(f"crd{var_name_suffix}", has_prior_event(resp_cov, index_date)) #chronic respiratory disease + dataset.add_column(f"diabetes{var_name_suffix}", has_diabetes(index_date)) #diabetes + dataset.add_column(f"cld{var_name_suffix}", has_prior_event(cld, index_date)) # chronic liver disease + dataset.add_column(f"chd{var_name_suffix}", has_prior_event(chd_cov, index_date)) #chronic heart disease + dataset.add_column(f"cns{var_name_suffix}", has_prior_event(cns_cov, index_date)) # chronic neurological disease + dataset.add_column(f"asplenia{var_name_suffix}", has_prior_event(spln_cov, index_date)) # asplenia or dysfunction of the Spleen + dataset.add_column(f"learndis{var_name_suffix}", has_prior_event(learndis, index_date)) # learning Disability + dataset.add_column(f"smi{var_name_suffix}", has_smi(index_date)) #severe mental illness + dataset.add_column(f"severe_obesity{var_name_suffix}", has_severe_obesity(index_date)) # severe obesity + dataset.add_column(f"primis_atrisk{var_name_suffix}", primis_atrisk(index_date)) # at risk \ No newline at end of file diff --git a/analysis/dataset_definition_all_var.py b/analysis/dataset_definition_all_var.py deleted file mode 100644 index e47dd44..0000000 --- a/analysis/dataset_definition_all_var.py +++ /dev/null @@ -1,37 +0,0 @@ -from ehrql import create_dataset -from ehrql.tables.tpp import patients, practice_registrations - -#import function for clinical variables -from variables_function import * - -# initialise dataset - -dataset = create_dataset() - -# Index date (choose a date) -index_date = "2020-03-31" -#Dummy data -dataset.configure_dummy_data(population_size=1000) - -#Data set definition -registered_patients = practice_registrations.for_patient_on(index_date) -registered = registered_patients.exists_for_patient() - -alive = (patients.date_of_death> index_date) | patients.date_of_death.is_null() - -# define dataset poppulation -dataset.define_population( - registered - & alive -) - -# Add groups of variables -#Demographic -demographic_variables(dataset = dataset, index_date = index_date) - -#Primis variables -primis_variables(dataset = dataset, index_date = index_date) - -#Clinical variables -other_cx_variables(dataset = dataset, index_date = index_date) - diff --git a/analysis/dataset_definition_spec_var.py b/analysis/dataset_definition_spec_var.py deleted file mode 100644 index 269b96d..0000000 --- a/analysis/dataset_definition_spec_var.py +++ /dev/null @@ -1,63 +0,0 @@ - -from ehrql import create_dataset -from ehrql.tables.tpp import patients, practice_registrations - -#import function for clinical variables -from variables_function import * - -#Import codelists -from codelists import * - -# initialise dataset - -dataset = create_dataset() - -# Index date (choose a date) -index_date = "2020-03-31" - -#Dummy data -dataset.configure_dummy_data(population_size=1000) - -#Data set definition -registered_patients = practice_registrations.for_patient_on(index_date) -registered = registered_patients.exists_for_patient() - -alive = (patients.date_of_death> index_date) | patients.date_of_death.is_null() - -# define dataset poppulation -dataset.define_population( - registered - & alive -) - - -# Add specific variables - -# demographic variables -dataset.age= patients.age_on(index_date) -dataset.region= registered_patients.practice_nuts1_region_name -dataset.stp= registered_patients.practice_stp -dataset.imd= addresses.for_patient_on(index_date).imd_rounded -dataset.ethn_16= last_prior_event(ethnicity_codelist16, index_date).snomedct_code.to_category(ethnicity_codelist16) -dataset.ethn_5= last_prior_event(ethnicity_codelist5, index_date).snomedct_code.to_category(ethnicity_codelist5) - -# PRIMIS variables: -dataset.crd= has_prior_event(resp_cov, index_date) #chronic respiratory disease -dataset.ast= has_asthma(index_date) #asthma -dataset.chd= has_prior_event(chd_cov, index_date) #chronic heart disease -dataset.ckd= has_ckd(index_date) #chronic kidney disease -dataset.cld= has_prior_event(cld, index_date) # chronic liver disease -dataset.cns= has_prior_event(cns_cov, index_date) # chronic neurological disease -dataset.learndis= has_prior_event(learndis, index_date) # learning Disability -dataset.diab= has_diab(index_date) #diabetes -dataset.immuno= is_immunosuppressed(index_date) #immunosuppress grouped -dataset.asplen= has_prior_event(spln_cov, index_date) # asplenia or dysfunction of the Spleen -dataset.obes= has_sev_obes(index_date) #immunosuppress grouped -dataset.sev_ment= has_sev_mental(index_date) #severe mental illness -dataset.one_primis= has_at_least_one_primis(index_date) #at least one primis - -## others cx variables of interest -dataset.sol_org_trans= has_prior_event(solid_organ_transplant, index_date) # Organs transplant -dataset.hiv= has_prior_event(hiv_aids, index_date) #HIV/AIDS -dataset.cancer= has_prior_event(cancer_nonhaem_snomed, index_date, where=clinical_events.date.is_after(index_date - days(int(3 * 365.25))))|has_prior_event(cancer_haem_snomed, index_date, where=clinical_events.date.is_after(index_date - days(int(3 * 365.25)))) - \ No newline at end of file diff --git a/analysis/variables_function.py b/analysis/variables_function.py deleted file mode 100644 index 0cd6b98..0000000 --- a/analysis/variables_function.py +++ /dev/null @@ -1,344 +0,0 @@ -##################################################### -# Common functions for contructing clinical queries # -##################################################### - -from ehrql.codes import CTV3Code, ICD10Code - -from ehrql import case, days, when, years - -from codelists import * - -from ehrql.tables.core import ( - medications, - patients -) - -from ehrql.tables.tpp import ( - addresses, -# opa_cost, - clinical_events, - practice_registrations, -# appointments, -# vaccinations -) - - - -##################### -# Clinical functions# -##################### - -# events occurring before spec date -# prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) - -# query prior_events for existence of event-in-codelist -def has_prior_event(codelist, index_date, where=True): - prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) - return ( - prior_events - .where(where) - .where(prior_events.snomedct_code.is_in(codelist)) - .exists_for_patient() - ) - -# query prior_events for date of most recent event-in-codelist -def last_prior_event(codelist, index_date, where=True): - prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) - return ( - prior_events.where(where) - .where(prior_events.snomedct_code.is_in(codelist)) - .sort_by(clinical_events.date) - .last_for_patient() - ) - -# query prior_events for date of earliest event-in-codelist -def first_prior_event(codelist, index_date, where=True): - prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) - return ( - prior_events.where(where) - .where(prior_events.snomedct_code.is_in(codelist)) - .sort_by(clinical_events.date) - .first_for_patient() - ) - -# meds occurring before spec date - -# query prior_meds for existence of event-in-codelist -def has_prior_meds(codelist, index_date, where=True): - prior_meds = medications.where(medications.date.is_on_or_before(index_date)) - return ( - prior_meds.where(where) - .where(prior_meds.dmd_code.is_in(codelist)) - .exists_for_patient() - ) - -# query prior meds for date of most recent med-in-codelist -def last_prior_meds(codelist, index_date, where=True): - prior_meds = medications.where(medications.date.is_on_or_before(index_date)) - return ( - prior_meds.where(where) - .where(prior_meds.dmd_code.is_in(codelist)) - .sort_by(medications.date) - .last_for_patient() - ) - -# query prior_events for date of earliest event-in-codelist -def first_prior_meds(codelist, index_date, where=True): - prior_meds = medications.where(medications.date.is_on_or_before(index_date)) - return ( - prior_meds.where(where) - .where(prior_meds.dmd_code.is_in(codelist)) - .sort_by(medications.date) - .first_for_patient() - ) - -###################### -# Composed variables # -###################### -# Patients with immunosuppression -def is_immunosuppressed(index_date): - # Immunosuppression diagnosis - immdx = has_prior_event(immdx_cov, index_date) - # Immunosuppression medication (within the last 3 years) - immrx_cov = has_prior_meds( - immrx, - index_date, - where=medications.date.is_on_or_after(index_date - days(int(3 * 365.25))) - ) - # Immunosuppression admin date (within the last 3 years) - immadm_cov = has_prior_event( - immadm, - index_date, - where=clinical_events.date.is_on_or_after(index_date - days(int(3 * 365.25))) - ) - # Chemotherapy medication date (within the last 3 years) - dxt_chemo_cov = has_prior_event( - dxt_chemo, - index_date, - where=clinical_events.date.is_on_or_after(index_date - days(int(3 * 365.25))) - ) - # Immunosuppression - immunosupp = case( - when(immdx.is_not_null()).then(True), - when(immrx_cov.is_not_null()).then(True), - when(immadm_cov.is_not_null()).then(True), - when(dxt_chemo_cov.is_not_null()).then(True), - otherwise=False - ) - return immunosupp - -#Patients with Chronic Kidney Disease -def has_ckd(index_date, where=True): - # Chronic kidney disease diagnostic codes - ckd = has_prior_event(ckd_cov, index_date) - # Chronic kidney disease codes - all stages - ckd15_date = last_prior_event(ckd15, index_date).date - # Chronic kidney disease codes-stages 3 - 5 - ckd35_date = last_prior_event(ckd35, index_date).date - # Chronic kidney disease - ckd_def = case( - when(ckd).then(True), - when((ckd35_date >= ckd15_date)).then(True), - otherwise=False - ) - return ckd_def - -#Patients with asthma - -def has_asthma(index_date, where=True): - prior_meds = medications.where(medications.date.is_on_or_before(index_date)) - #Asthma diagnosis - astdx = has_prior_event( - ast, - index_date) - #Asthma admision - asthadm = has_prior_event( - astadm, - index_date, - where=clinical_events.date.is_on_or_between(index_date - days(730), index_date) - ) - # Inhaled asthma prescription in previous year - astrx_inhaled = has_prior_meds( - astrxm1, - index_date, - where=medications.date.is_on_or_after(index_date - days(365))) - # count of systemic steroid prescription inpast 2 years - astrx_oral_count = ( - prior_meds - .where(prior_meds.dmd_code.is_in(astrx)) - .where(prior_meds.date.is_on_or_between(index_date - days(730), index_date)) - .count_for_patient()) - # Asthma - asthma = case( - when(asthadm).then(True), - when(astdx & astrx_inhaled & (astrx_oral_count >= 2)).then(True), - otherwise=False - ) - return asthma - -# Patients with Morbid Obesity -#Need to include that they need to be >18yo -def has_sev_obes(index_date): - # Last BMI stage event - bmi_stage_event = last_prior_event( - bmi_stage, - index_date - ) - # Last severe obesity event (after the BMI stage event, with valid numeric value) - sev_obesity_event = last_prior_event( - sev_obesity, - index_date, - where=((clinical_events.date >= bmi_stage_event.date) & - (clinical_events.numeric_value != 0.0) - ) - ) - # Last BMI event not 0 - bmi_event = last_prior_event( - bmi, - index_date, - where=(clinical_events.numeric_value != 0.0) - ) - # Severe obesity - severe_obesity = case( - when(sev_obesity_event.date > bmi_event.date).then(True), - when(bmi_event.numeric_value >= 40.0).then(True), - otherwise=False - ) - return severe_obesity - - -# Pregnant variable to identify gestational diabetes -def preg_group(index_date): - # Pregnancy delivery code date - pregAdel_date = last_prior_event( - pregdel, - index_date, - where=clinical_events.date.is_on_or_between(index_date - days(7 * 65), index_date - days((7 * 30) + 1)) - ).date - # Pregnancy:8 months and 15 months - pregA_date = last_prior_event( - preg, - index_date, - where=clinical_events.date.is_on_or_between(index_date - days(7 * 65), index_date - days((7 * 30) + 1)) - ).date - # Pregnancy: <8 months - pregB = has_prior_event( - preg, - index_date, - where=clinical_events.date.is_on_or_between(index_date - days(7 * 30), index_date) - ) - # Pregnancy group - pregnancy_group = case( - when(pregB).then(True), - when( - (pregAdel_date.is_not_null() & - pregA_date.is_not_null() & - pregA_date.is_on_or_after(pregAdel_date)) - ).then(True), - otherwise=False - ) - return pregnancy_group - -#Patients with Diabetes -def has_diab(index_date, where=True): - diab_date = last_prior_event(diab, index_date).date - dmres_date = last_prior_event(dmres, index_date).date - gesdiab = has_prior_event(gdiab, index_date) - gdiab_group = gesdiab & preg_group(index_date) - addis_cov = has_prior_event(addis, index_date) - # Diabetes condition - diabetes = case( - when(dmres_date < diab_date).then(True), - when(diab_date.is_not_null() & dmres_date.is_null()).then(True), - when(addis_cov).then(True), - when(gdiab_group).then(True), - otherwise=False - ) - return diabetes - -# Severe mental illness -def has_sev_mental(index_date, where=True): - sev_mental_date = last_prior_event(sev_mental, index_date).date - # Remission codes relating to Severe Mental Illness - smhres_date = last_prior_event(smhres, index_date).date - # Severe mental illness - sev_mental_ill = case( - when(smhres_date < sev_mental_date).then(True), - when(sev_mental_date.is_not_null() & smhres_date.is_null()).then(True), - otherwise=False - ) - return sev_mental_ill - -# At least one primis variable -def has_at_least_one_primis(index_date): - return ( - has_prior_event(resp_cov, index_date) | # chronic respiratory disease - has_asthma(index_date) | # asthma - has_prior_event(chd_cov, index_date) | # chronic heart disease - has_ckd(index_date) | # chronic kidney disease - has_prior_event(cld, index_date) | # chronic liver disease - has_prior_event(cns_cov, index_date) | # chronic neurological disease - has_prior_event(learndis, index_date) | # learning disability - has_diab(index_date) | # diabetes - is_immunosuppressed(index_date) | # immunosuppression grouped - has_prior_event(spln_cov, index_date) | # asplenia or spleen dysfunction - has_sev_obes(index_date) | # severe obesity - has_sev_mental(index_date) # severe mental illness - ) - - -## functions to define variables across multiple study definitions - -# demographic variables -def demographic_variables(dataset, index_date, var_name_suffix=""): - registration = practice_registrations.for_patient_on(index_date) - dataset.add_column(f"age{var_name_suffix}", patients.age_on(index_date)) - dataset.add_column(f"region{var_name_suffix}", registration.practice_nuts1_region_name) - dataset.add_column(f"stp{var_name_suffix}", registration.practice_stp) - dataset.add_column(f"imd{var_name_suffix}", addresses.for_patient_on(index_date).imd_rounded) - dataset.add_column(f"ethn_16{var_name_suffix}", last_prior_event(ethnicity_codelist16, index_date).snomedct_code.to_category(ethnicity_codelist16)) - dataset.add_column(f"ethn_5{var_name_suffix}", last_prior_event(ethnicity_codelist5, index_date).snomedct_code.to_category(ethnicity_codelist5)) - -# PRIMIS variables -# Green book: -# Clinical risk groups >16 (16/9/2024): -# chronic respiratory disease (include asthma), -# chronic heart disease and vascular disease, -# chronic kidney disease -# chronic liver disease -# chronic neurological disease (include severe learning disability) -# diabetes mellitus and other endocrine disorders -# immunosupression -# asplenia or dysfunction of the spleen -# morbid obesity -# severe mental illness -# x young adults Younger adults in long-stay nursing and residential care settings -# x pregnancy - -def primis_variables(dataset, index_date, var_name_suffix=""): - dataset.add_column(f"crd{var_name_suffix}", has_prior_event(resp_cov, index_date)) #chronic respiratory disease - dataset.add_column(f"ast{var_name_suffix}", has_asthma(index_date)) #asthma - dataset.add_column(f"chd{var_name_suffix}", has_prior_event(chd_cov, index_date)) #chronic heart disease - dataset.add_column(f"ckd{var_name_suffix}", has_ckd(index_date)) #chronic kidney disease - dataset.add_column(f"cld{var_name_suffix}", has_prior_event(cld, index_date)) # chronic liver disease - dataset.add_column(f"cns{var_name_suffix}", has_prior_event(cns_cov, index_date)) # chronic neurological disease - dataset.add_column(f"learndis{var_name_suffix}", has_prior_event(learndis, index_date)) # learning Disability - dataset.add_column(f"diab{var_name_suffix}", has_diab(index_date)) #diabetes - dataset.add_column(f"immuno{var_name_suffix}", is_immunosuppressed(index_date)) #immunosuppress grouped - dataset.add_column(f"asplen{var_name_suffix}", has_prior_event(spln_cov, index_date)) # asplenia or dysfunction of the Spleen - dataset.add_column(f"obes{var_name_suffix}", has_sev_obes(index_date)) #immunosuppress grouped - dataset.add_column(f"sev_ment{var_name_suffix}", has_sev_mental(index_date)) #severe mental illness - dataset.add_column(f"one_primis{var_name_suffix}", has_at_least_one_primis(index_date)) #at least one primis - -# No: -# younger adults in long-stay nursing and residential care settings -# pregnancy - - ## other cx variables of interest -def other_cx_variables(dataset, index_date, var_name_suffix=""): - dataset.add_column(f"sol_org_trans{var_name_suffix}", has_prior_event(solid_organ_transplant, index_date)) # Organs transplant - dataset.add_column(f"hiv{var_name_suffix}", has_prior_event(hiv_aids, index_date)) #HIV/AIDS - dataset.add_column(f"cancer{var_name_suffix}", - has_prior_event(cancer_nonhaem_snomed, index_date, where=clinical_events.date.is_after(index_date - days(int(3 * 365.25))))| - has_prior_event(cancer_haem_snomed, index_date, where=clinical_events.date.is_after(index_date - days(int(3 * 365.25)))) - ) #cancer \ No newline at end of file diff --git a/codelists/codelists.txt b/codelists/codelists.txt index 36eb62f..ae04969 100644 --- a/codelists/codelists.txt +++ b/codelists/codelists.txt @@ -1,6 +1,6 @@ -#Ethnicity -opensafely/ethnicity-snomed-0removed/22911876 - +####################################################### +# PRIMIS +####################################################### #Asthma primis-covid19-vacc-uptake/astadm/6dee7f06 @@ -55,30 +55,5 @@ primis-covid19-vacc-uptake/bmi/v2.5 primis-covid19-vacc-uptake/bmi_stage/v2.5 primis-covid19-vacc-uptake/sev_obesity/v2.5 - #Learning disability primis-covid19-vacc-uptake/learndis/v2.5 - -# HIV/SIDA -nhsd/hiv-aids-snomed/68ca529e - -#Solid organ transplant -opensafely/solid-organ-transplantation-snomed/2020-04-10 - -#Cancer -#Non haem-lung cancer -opensafely/cancer-excluding-lung-and-haematological-snomed/2020-04-15 - -#Chemotherapy -opensafely/chemotherapy-or-radiotherapy-snomed/2020-04-15 - -#Lung cancer -opensafely/lung-cancer-snomed/2020-04-15 - -# Haem cancer -opensafely/haematological-cancer-snomed/2020-04-15 - -#Other -#nhsd/haematopoietic-stem-cell-transplant-snomed/3ec3ac16 -#nhsd/haematological-malignancies-snomed/31a49191 -#nhsd/sickle-spl-atriskv4-snomed-ct/7083ed37 \ No newline at end of file diff --git a/project.yaml b/project.yaml index f95583a..7daa19e 100644 --- a/project.yaml +++ b/project.yaml @@ -1,14 +1,13 @@ version: '4.0' actions: - generate_dataset_all_var: - run: ehrql:v1 generate-dataset analysis/dataset_definition_all_var.py --output output/dataset_all.csv.gz - outputs: - highly_sensitive: - dataset: output/dataset_all.csv.gz - generate_dataset_spec_var: - run: ehrql:v1 generate-dataset analysis/dataset_definition_spec_var.py --output output/dataset_spec.csv.gz +####################################################### +# PRIMIS +####################################################### + + generate_dataset_PRIMIS: + run: ehrql:v1 generate-dataset analysis/PRIMIS/dataset_definition.py --output output/PRIMIS/dataset.csv.gz outputs: highly_sensitive: - dataset: output/dataset_spec.csv.gz \ No newline at end of file + dataset: output/PRIMIS/dataset.csv.gz \ No newline at end of file