Skip to content

Commit

Permalink
Merge pull request #2328 from opensafely-core/additional-pop-constrai…
Browse files Browse the repository at this point in the history
…nts-measures

Allow measures to specify dummy data additional population constraint
  • Loading branch information
rebkwok authored Dec 19, 2024
2 parents 53c57d1 + 8bf2037 commit 784d011
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 16 deletions.
15 changes: 14 additions & 1 deletion docs/includes/generated_docs/language__measures.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ this method more than once is an error.
</div>

<div class="attr-heading" id="Measures.configure_dummy_data">
<tt><strong>configure_dummy_data</strong>(<em>population_size=10</em>, <em>legacy=False</em>, <em>timeout=60</em>)</tt>
<tt><strong>configure_dummy_data</strong>(<em>population_size=10</em>, <em>legacy=False</em>, <em>timeout=60</em>, <em>additional_population_constraint=None</em>)</tt>
<a class="headerlink" href="#Measures.configure_dummy_data" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Expand All @@ -98,6 +98,19 @@ Use legacy dummy data.
_timeout_<br>
Maximum time in seconds to spend generating dummy data.

_additional_population_constraint_<br>
An additional ehrQL query that can be used to constrain the population that will
be selected for dummy data. This is incompatible with legacy mode.

For example, if you wanted to ensure that two dates appear in a particular order in your
dummy data, you could add ``additional_population_constraint = dataset.first_date <
dataset.second_date``.

You can also combine constraints with ``&`` as normal in ehrQL.
e.g. ``additional_population_constraint = patients.sex.is_in(['male', 'female']) & (
patients.age_on(some_date) < 80)`` would give you dummy data consisting of only men
and women who were under the age of 80 on some particular date.

```py
measures.configure_dummy_data(population_size=10000)
```
Expand Down
17 changes: 17 additions & 0 deletions ehrql/measures/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def configure_dummy_data(
population_size=DummyDataConfig.population_size,
legacy=DummyDataConfig.legacy,
timeout=DummyDataConfig.timeout,
additional_population_constraint=None,
):
"""
Configure the dummy data to be generated.
Expand All @@ -308,13 +309,29 @@ def configure_dummy_data(
_timeout_<br>
Maximum time in seconds to spend generating dummy data.
_additional_population_constraint_<br>
An additional ehrQL query that can be used to constrain the population that will
be selected for dummy data. This is incompatible with legacy mode.
For example, if you wanted to ensure that two dates appear in a particular order in your
dummy data, you could add ``additional_population_constraint = dataset.first_date <
dataset.second_date``.
You can also combine constraints with ``&`` as normal in ehrQL.
e.g. ``additional_population_constraint = patients.sex.is_in(['male', 'female']) & (
patients.age_on(some_date) < 80)`` would give you dummy data consisting of only men
and women who were under the age of 80 on some particular date.
```py
measures.configure_dummy_data(population_size=10000)
```
"""
self.dummy_data_config.population_size = population_size
self.dummy_data_config.legacy = legacy
self.dummy_data_config.timeout = timeout
self.dummy_data_config.set_additional_population_constraint(
additional_population_constraint
)

def configure_disclosure_control(self, *, enabled=True):
"""
Expand Down
33 changes: 18 additions & 15 deletions ehrql/query_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@ class DummyDataConfig:
timeout: int = 60
additional_population_constraint: "qm.Series[bool] | None" = None

def set_additional_population_constraint(self, additional_population_constraint):
if additional_population_constraint is not None:
validate_patient_series_type(
additional_population_constraint,
types=[bool],
context="additional population constraint",
)
self.additional_population_constraint = (
additional_population_constraint._qm_node
)
if self.legacy and self.additional_population_constraint is not None:
raise ValueError(
"Cannot provide an additional population constraint in legacy mode."
)


class Dataset:
"""
Expand Down Expand Up @@ -149,21 +164,9 @@ def configure_dummy_data(
self.dummy_data_config.population_size = population_size
self.dummy_data_config.legacy = legacy
self.dummy_data_config.timeout = timeout
if additional_population_constraint is not None:
validate_patient_series_type(
additional_population_constraint,
types=[bool],
context="additional population constraint",
)
self.dummy_data_config.additional_population_constraint = (
additional_population_constraint._qm_node
)
else:
self.dummy_data_config.additional_population_constraint = None
if legacy and additional_population_constraint is not None:
raise ValueError(
"Cannot provide an additional population constraint in legacy mode."
)
self.dummy_data_config.set_additional_population_constraint(
additional_population_constraint
)

def __setattr__(self, name, value):
if name == "population":
Expand Down

0 comments on commit 784d011

Please sign in to comment.