Skip to content

Commit

Permalink
Fix formatted CSV downloads when no original term column
Browse files Browse the repository at this point in the history
  • Loading branch information
rebkwok committed Dec 12, 2023
1 parent bd2df57 commit e47cd09
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 18 deletions.
55 changes: 37 additions & 18 deletions codelists/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,9 +734,11 @@ def formatted_table(self, fixed_headers=False, include_mapped_vmps=False):
table_rows = self.table[1:]
if include_mapped_vmps and self.coding_system_id == "dmd":
# ignore include_mapped_vmps if coding system is anything other than dmd
table_rows = self._add_mapped_vmps_to_table_data(
additional_table_rows = self._get_additional_rows_for_mapped_vmps(
table_rows, code_header_index, term_header_index
)
else:
additional_table_rows = []

headers = ["code", "term"]
code_header_relabelled = original_code_header != "code"
Expand Down Expand Up @@ -768,29 +770,40 @@ def formatted_table(self, fixed_headers=False, include_mapped_vmps=False):
# re-write the table data with the new headers, and only the code and term columns
# plus a duplicate code column with the original column header if required, and
# any other original columns
def _csv_row(row):
def _csv_row(row, term_ix=None):
csv_row = [
row[code_header_index],
row[term_header_index] if original_term_header else "",
row[term_ix] if term_ix is not None else "",
]
if not fixed_headers:
if code_header_relabelled:
csv_row += [row[code_header_index]]
csv_row += [row[ix] for ix in list(other_header_ix)]
return csv_row

# If there was no original term header, we need to treat the original table data
# and the additional VMP mapped data differently. The original table data will
# set the term to an empty string. Additional mapped VMPs will set it to the
# description generated by the mapping (which will be in the last column of
# data, if there no original column to map it into)
table_data = [
headers,
*[_csv_row(row) for row in table_rows],
*[_csv_row(row, term_ix=term_header_index) for row in table_rows],
]
table_data.extend(
[
_csv_row(row, term_ix=term_header_index or -1)
for row in additional_table_rows
],
)

return table_data

def _add_mapped_vmps_to_table_data(self, table_rows, code_ix, term_ix):
def _get_additional_rows_for_mapped_vmps(self, table_rows, code_ix, term_ix):
"""
Take a list of dm+d table rows for CSV download, plus an index identifying
which column to find the code and term in for each row, and append to it
any previous or subsequent mapped VMPs
which column to find the code and term in for each row, and return a list of
rows to be added to it with any previous or subsequent mapped VMPs
"""
term_ix = term_ix or len(self.table[0])
codes_dict = {row[code_ix]: row for row in table_rows}
Expand Down Expand Up @@ -828,10 +841,13 @@ def add_row(vmp, description, original_code):
# first one to get the starting row
original_code = original_code[0]
new_row = [*codes_dict[original_code]]
if len(new_row) == term_ix:
new_row.append("")
new_row[code_ix] = vmp
new_row[term_ix] = description
table_rows.append(new_row)
return new_row

additional_rows = []
# Sort the VMPs being added to ensure consistent order. This will ensure that
# repeated CSV downloads are the same unless new mapped VMPs are added and
# can be used to check whether updates to study codelists are required.
Expand All @@ -840,24 +856,27 @@ def add_row(vmp, description, original_code):
# include its description as the code(s) it was superceded by
# sort the codes so that ordering is consistent between downloads
original_codes = sorted(previous_vmps_to_add[previous_vmp])
add_row(
previous_vmp,
f"VMP previous to {', '.join(original_codes)}",
original_codes,
additional_rows.append(
add_row(
previous_vmp,
f"VMP previous to {', '.join(original_codes)}",
original_codes,
)
)

for subsequent_vmp in sorted(subsequent_vmps_to_add):
# add the code to the table data
# include its description as the code it supercedes
# sort the codes so that ordering is consistent between downloads
original_codes = sorted(subsequent_vmps_to_add[subsequent_vmp])
add_row(
subsequent_vmp,
f"VMP subsequent to {', '.join(original_codes)}",
original_codes,
additional_rows.append(
add_row(
subsequent_vmp,
f"VMP subsequent to {', '.join(original_codes)}",
original_codes,
)
)

return table_rows
return additional_rows

def definition_csv_data_for_download(self):
return rows_to_csv_data(present_definition_for_download(self))
Expand Down
81 changes: 81 additions & 0 deletions codelists/tests/views/test_version_download.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from mappings.dmdvmpprevmap.models import Mapping
from opencodelists.csv_utils import csv_data_to_rows

Expand Down Expand Up @@ -128,6 +130,85 @@ def test_get_with_mapped_vmps_and_original_code_column(
]


@pytest.mark.parametrize(
"csv_data,expected",
[
(
"dmd_id\n10514511000001106\n10525011000001107",
[
["code", "term", "dmd_id"],
["10514511000001106", "", "10514511000001106"],
["10525011000001107", "", "10525011000001107"],
["999", "VMP previous to 10514511000001106", "999"],
["888", "VMP subsequent to 10514511000001106", "888"],
],
),
(
"code\n10514511000001106\n10525011000001107",
[
["code", "term"],
["10514511000001106", ""],
["10525011000001107", ""],
["999", "VMP previous to 10514511000001106"],
["888", "VMP subsequent to 10514511000001106"],
],
),
(
"dmd_id,dmd_type,bnf_code\n"
"10514511000001106,VMP,0301012A0AAABAB\n10525011000001107,VMP,0301012A0AAACAC",
[
["code", "term", "dmd_id", "dmd_type", "bnf_code"],
[
"10514511000001106",
"",
"10514511000001106",
"VMP",
"0301012A0AAABAB",
],
[
"10525011000001107",
"",
"10525011000001107",
"VMP",
"0301012A0AAACAC",
],
[
"999",
"VMP previous to 10514511000001106",
"999",
"VMP",
"0301012A0AAABAB",
],
[
"888",
"VMP subsequent to 10514511000001106",
"888",
"VMP",
"0301012A0AAABAB",
],
],
),
],
)
def test_get_with_mapped_vmps_no_term_column(
client, dmd_version_asthma_medication, csv_data, expected
):
# create a previous mapping for one of the dmd codes
Mapping.objects.create(id="10514511000001106", vpidprev="999")
# create a new mapping for one of the dmd codes
Mapping.objects.create(id="888", vpidprev="10514511000001106")

# update CSV data
dmd_version_asthma_medication.csv_data = csv_data
dmd_version_asthma_medication.save()

rsp = client.get(dmd_version_asthma_medication.get_download_url())
data = rsp.content.decode("utf8")
# Includes mapped VMPs and uses fixed headers by default
# Includes an additional column with the original code header
assert csv_data_to_rows(data) == expected


def test_get_with_mapped_vmps_and_fixed_headers(client, dmd_version_asthma_medication):
# create a previous mapping for one of the dmd codes
Mapping.objects.create(id="10514511000001106", vpidprev="999")
Expand Down

0 comments on commit e47cd09

Please sign in to comment.