Fix formatted CSV downloads when no original term column

opensafely-core · Dec 12, 2023 · e47cd09 · e47cd09
1 parent bd2df57
commit e47cd09
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 18 deletions.
diff --git a/codelists/models.py b/codelists/models.py
@@ -734,9 +734,11 @@ def formatted_table(self, fixed_headers=False, include_mapped_vmps=False):
         table_rows = self.table[1:]
         if include_mapped_vmps and self.coding_system_id == "dmd":
             # ignore include_mapped_vmps if coding system is anything other than dmd
-            table_rows = self._add_mapped_vmps_to_table_data(
+            additional_table_rows = self._get_additional_rows_for_mapped_vmps(
                 table_rows, code_header_index, term_header_index
             )
+        else:
+            additional_table_rows = []
 
         headers = ["code", "term"]
         code_header_relabelled = original_code_header != "code"
@@ -768,29 +770,40 @@ def formatted_table(self, fixed_headers=False, include_mapped_vmps=False):
         # re-write the table data with the new headers, and only the code and term columns
         # plus a duplicate code column with the original column header if required, and
         # any other original columns
-        def _csv_row(row):
+        def _csv_row(row, term_ix=None):
             csv_row = [
                 row[code_header_index],
-                row[term_header_index] if original_term_header else "",
+                row[term_ix] if term_ix is not None else "",
             ]
             if not fixed_headers:
                 if code_header_relabelled:
                     csv_row += [row[code_header_index]]
                 csv_row += [row[ix] for ix in list(other_header_ix)]
             return csv_row
 
+        # If there was no original term header, we need to treat the original table data
+        # and the additional VMP mapped data differently. The original table data will
+        # set the term to an empty string. Additional mapped VMPs will set it to the
+        # description generated by the mapping (which will be in the last column of
+        # data, if there no original column to map it into)
         table_data = [
             headers,
-            *[_csv_row(row) for row in table_rows],
+            *[_csv_row(row, term_ix=term_header_index) for row in table_rows],
         ]
+        table_data.extend(
+            [
+                _csv_row(row, term_ix=term_header_index or -1)
+                for row in additional_table_rows
+            ],
+        )
 
         return table_data
 
-    def _add_mapped_vmps_to_table_data(self, table_rows, code_ix, term_ix):
+    def _get_additional_rows_for_mapped_vmps(self, table_rows, code_ix, term_ix):
         """
         Take a list of dm+d table rows for CSV download, plus an index identifying
-        which column to find the code and term in for each row, and append to it
-        any previous or subsequent mapped VMPs
+        which column to find the code and term in for each row, and return a list of
+        rows to be added to it with any previous or subsequent mapped VMPs
         """
         term_ix = term_ix or len(self.table[0])
         codes_dict = {row[code_ix]: row for row in table_rows}
@@ -828,10 +841,13 @@ def add_row(vmp, description, original_code):
             # first one to get the starting row
             original_code = original_code[0]
             new_row = [*codes_dict[original_code]]
+            if len(new_row) == term_ix:
+                new_row.append("")
             new_row[code_ix] = vmp
             new_row[term_ix] = description
-            table_rows.append(new_row)
+            return new_row
 
+        additional_rows = []
         # Sort the VMPs being added to ensure consistent order. This will ensure that
         # repeated CSV downloads are the same unless new mapped VMPs are added and
         # can be used to check whether updates to study codelists are required.
@@ -840,24 +856,27 @@ def add_row(vmp, description, original_code):
             # include its description as the code(s) it was superceded by
             # sort the codes so that ordering is consistent between downloads
             original_codes = sorted(previous_vmps_to_add[previous_vmp])
-            add_row(
-                previous_vmp,
-                f"VMP previous to {', '.join(original_codes)}",
-                original_codes,
+            additional_rows.append(
+                add_row(
+                    previous_vmp,
+                    f"VMP previous to {', '.join(original_codes)}",
+                    original_codes,
+                )
             )
 
         for subsequent_vmp in sorted(subsequent_vmps_to_add):
             # add the code to the table data
             # include its description as the code it supercedes
             # sort the codes so that ordering is consistent between downloads
             original_codes = sorted(subsequent_vmps_to_add[subsequent_vmp])
-            add_row(
-                subsequent_vmp,
-                f"VMP subsequent to {', '.join(original_codes)}",
-                original_codes,
+            additional_rows.append(
+                add_row(
+                    subsequent_vmp,
+                    f"VMP subsequent to {', '.join(original_codes)}",
+                    original_codes,
+                )
             )
-
-        return table_rows
+        return additional_rows
 
     def definition_csv_data_for_download(self):
         return rows_to_csv_data(present_definition_for_download(self))

diff --git a/codelists/tests/views/test_version_download.py b/codelists/tests/views/test_version_download.py
@@ -1,3 +1,5 @@
+import pytest
+
 from mappings.dmdvmpprevmap.models import Mapping
 from opencodelists.csv_utils import csv_data_to_rows
 
@@ -128,6 +130,85 @@ def test_get_with_mapped_vmps_and_original_code_column(
     ]
 
 
+@pytest.mark.parametrize(
+    "csv_data,expected",
+    [
+        (
+            "dmd_id\n10514511000001106\n10525011000001107",
+            [
+                ["code", "term", "dmd_id"],
+                ["10514511000001106", "", "10514511000001106"],
+                ["10525011000001107", "", "10525011000001107"],
+                ["999", "VMP previous to 10514511000001106", "999"],
+                ["888", "VMP subsequent to 10514511000001106", "888"],
+            ],
+        ),
+        (
+            "code\n10514511000001106\n10525011000001107",
+            [
+                ["code", "term"],
+                ["10514511000001106", ""],
+                ["10525011000001107", ""],
+                ["999", "VMP previous to 10514511000001106"],
+                ["888", "VMP subsequent to 10514511000001106"],
+            ],
+        ),
+        (
+            "dmd_id,dmd_type,bnf_code\n"
+            "10514511000001106,VMP,0301012A0AAABAB\n10525011000001107,VMP,0301012A0AAACAC",
+            [
+                ["code", "term", "dmd_id", "dmd_type", "bnf_code"],
+                [
+                    "10514511000001106",
+                    "",
+                    "10514511000001106",
+                    "VMP",
+                    "0301012A0AAABAB",
+                ],
+                [
+                    "10525011000001107",
+                    "",
+                    "10525011000001107",
+                    "VMP",
+                    "0301012A0AAACAC",
+                ],
+                [
+                    "999",
+                    "VMP previous to 10514511000001106",
+                    "999",
+                    "VMP",
+                    "0301012A0AAABAB",
+                ],
+                [
+                    "888",
+                    "VMP subsequent to 10514511000001106",
+                    "888",
+                    "VMP",
+                    "0301012A0AAABAB",
+                ],
+            ],
+        ),
+    ],
+)
+def test_get_with_mapped_vmps_no_term_column(
+    client, dmd_version_asthma_medication, csv_data, expected
+):
+    # create a previous mapping for one of the dmd codes
+    Mapping.objects.create(id="10514511000001106", vpidprev="999")
+    # create a new mapping for one of the dmd codes
+    Mapping.objects.create(id="888", vpidprev="10514511000001106")
+
+    # update CSV data
+    dmd_version_asthma_medication.csv_data = csv_data
+    dmd_version_asthma_medication.save()
+
+    rsp = client.get(dmd_version_asthma_medication.get_download_url())
+    data = rsp.content.decode("utf8")
+    # Includes mapped VMPs  and uses fixed headers by default
+    # Includes an additional column with the original code header
+    assert csv_data_to_rows(data) == expected
+
+
 def test_get_with_mapped_vmps_and_fixed_headers(client, dmd_version_asthma_medication):
     # create a previous mapping for one of the dmd codes
     Mapping.objects.create(id="10514511000001106", vpidprev="999")