Skip to content

Commit

Permalink
Merge pull request #2262 from opensafely-core/evansd/dummy-data-timeo…
Browse files Browse the repository at this point in the history
…ut-tweaks

Dummy data configuration tweaks
  • Loading branch information
evansd authored Nov 29, 2024
2 parents a61b4cd + 1c9f5ca commit a4cd62a
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ehrql/dummy_data/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ def get_data(self):
f"Failed to find {self.population_size} matching patients within "
f"{self.timeout} seconds — giving up"
)
log.info(
f"Use e.g. `dataset.configure_dummy_data(timeout={self.timeout * 2})` "
f"to try for longer"
)
return data

def get_patient_id_batches(self):
Expand Down
4 changes: 4 additions & 0 deletions ehrql/dummy_data_nextgen/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ def get_data(self):
f"Failed to find {self.population_size} matching patients within "
f"{self.timeout} seconds — giving up"
)
log.info(
f"Use e.g. `dataset.configure_dummy_data(timeout={self.timeout * 2})` "
f"to try for longer"
)
return data

def get_patient_id_batches(self):
Expand Down
36 changes: 36 additions & 0 deletions tests/integration/test___main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,42 @@ class test_table(PatientFrame):
assert "is not contained within the directory" in str(e.value)


@pytest.mark.parametrize("legacy", [True, False])
def test_generate_dataset_passes_dummy_data_config(tmp_path, caplog, legacy):
extra_args = ", legacy=True" if legacy else ""
code = textwrap.dedent(
f"""\
from ehrql import create_dataset
from ehrql.tables.core import patients
dataset = create_dataset()
dataset.define_population(patients.exists_for_patient())
dataset.sex = patients.sex
dataset.configure_dummy_data(population_size=2, timeout=3{extra_args})
"""
)
dataset_file = tmp_path / "dataset_definition.py"
dataset_file.write_text(code)

main(
[
"generate-dataset",
str(dataset_file),
"--output",
str(tmp_path / "output.csv"),
]
)

logs = caplog.text
assert "Attempting to generate 2 matching patients" in logs
assert "timeout: 3s" in logs
if legacy:
assert "Using legacy dummy data generation" in logs
else:
assert "Using next generation dummy data generation" in logs


@pytest.mark.skipif(
not sys.platform.startswith("linux"),
reason="Subprocess isolation only works on Linux",
Expand Down

0 comments on commit a4cd62a

Please sign in to comment.