diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
index 0ee403abb0..f3589757c7 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_Items_and_Packages.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4106c2e3ae068d40b115857885b673bec3e1114be5183c0a4ae0366560e2a5c9
-size 249391
+oid sha256:596a1bc8d570f341da180fea6db1836c181f6a2a984a9c7f9b4990b78df8e689
+size 215244
diff --git a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
index 25249531b2..43d8a7b653 100644
--- a/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
+++ b/resources/healthsystem/consumables/ResourceFile_Consumables_availability_small.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c358a643e4def0e574b75f89f83d77f9c3366f668422e005150f4d69ebe8d7a7
-size 6169152
+oid sha256:daa5490827d6857323fc837f928b8d983444d35489c8db7512191833a456d483
+size 10086795
diff --git a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
index 7ab675ecba..ce28143182 100644
--- a/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
+++ b/resources/healthsystem/consumables/ResourceFile_consumables_matched.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5b0f417681cbdd2489e2f9c6634b2825c32beb9637dc045b56e308c910a102c
-size 90569
+oid sha256:3b1e2cdb4905e48b6ca1340376afb0604a593240cad3d6a2931c28d11fe438b7
+size 58088
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/clean_fac_locations.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/clean_fac_locations.py
deleted file mode 100644
index 3dcd4fe56e..0000000000
--- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/clean_fac_locations.py
+++ /dev/null
@@ -1,360 +0,0 @@
-"""
-This script generates GIS data on facilities:
-
-Outputs:
-* ResourceFile_Facility_locations.csv
-* facility_distances.csv
-
-The following variables are added to the dataset generated by consumables_avaialbility_estimation.py:
-1. facility  GIS coordinates
-2. Distance and drive time to corresponding District Health office
-3. Distance and drive time to corresponding Regional Medical Store (warehouse)
-
-Inputs:
-Dropbox location - ~05 - Resources/Module-healthsystem/consumables raw files/gis_data/LMISFacilityLocations_raw.xlsx
-
-NB. The comment of this file are commented-out because the script requires dependencies that are not included in the
-TLO framework at the time of writing.
-"""
-
-
-"""
-import datetime
-from pathlib import Path
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import requests
-import googlemaps as gmaps
-import requests
-from matplotlib.lines import Line2D
-
-
-# Path to TLO directory
-outputfilepath = Path("./outputs")
-resourcefilepath = Path("./resources")
-path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
-
-# Set local Dropbox source
-path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
-    'C:/Users/sm2511/Dropbox/Thanzi la Onse'
-    # '/Users/tbh03/Dropbox (SPH Imperial College)/Thanzi la Onse Theme 1 SHARE'
-)
-
-path_to_files_in_the_tlo_dropbox = path_to_dropbox / "05 - Resources/Module-healthsystem/consumables raw files/"
-
-# define a timestamp for script outputs
-timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
-
-# print the start time of the script
-print('Script Start', datetime.datetime.now().strftime('%H:%M'))
-
-# Use googlemaps package to obtain GIS coordinates using facility names
-GCODE_URL = 'https://maps.googleapis.com/maps/api/geocode/json?'
-GCODE_KEY = ''  # PLaceholder to enter googlemaps API
-# gmaps = gmaps.Client(key=GCODE_KEY)
-
-# 1. Clean Master Health Facility Registry (MHFR) data
-######################################################################
-# Clean locations for facilities for which GIS data was not available on incorrect in the MHFR
-# --- 1.1 Load and set up data --- #
-fac_gis = pd.read_excel(open(path_to_files_in_the_tlo_dropbox / 'gis_data/LMISFacilityLocations_raw.xlsx',
-                             'rb'), sheet_name='final_gis_data')
-fac_gis = fac_gis.rename(
-    columns={'LMIS Facility List': 'fac_name', 'OWNERSHIP': 'fac_owner', 'TYPE': 'fac_type', 'STATUS': 'fac_status',
-             'ZONE': 'zone', 'DISTRICT': 'district', 'DATE OPENED': 'open_date', 'LATITUDE': 'lat',
-             'LONGITUDE': 'long'})
-
-# Create a new column providing source of GIS data
-fac_gis['gis_source'] = ""
-
-# Store unique district names
-districts = fac_gis['district'].unique()
-
-# Preserve rows with missing or incorrect location data in order to derive GIS data using googlemaps API
-cond1 = fac_gis['lat'] > -8.5
-cond2 = fac_gis['lat'] < -17.5
-cond3 = fac_gis['long'] > 36.5
-cond4 = fac_gis['long'] < 32.5
-conda = cond1 | cond2 | cond3 | cond4  # outside Malawi's boundaries
-fac_gis_noloc = fac_gis[fac_gis.lat.isna() | conda]
-fac_gis_noloc = fac_gis_noloc.reset_index()
-fac_gis_noloc = fac_gis_noloc.drop(columns='index')
-
-# Edit data source
-cond_originalmhfr = fac_gis.lat.notna() & ~conda
-fac_gis.loc[cond_originalmhfr, 'gis_source'] = 'Master Health Facility Registry'
-cond_manual = fac_gis['manual_entry'].notna()
-fac_gis.loc[cond_manual, 'gis_source'] = 'Manual google search'
-
-fac_gis_clean = fac_gis[~conda & fac_gis.lat.notna()]  # save clean portion of raw data to be appended later
-
-
-# --- 1.2 Geocode facilities with missing data --- #
-# Define a function to geocode locations based on names
-def reverse_gcode(location):
-    location = str(location).replace(' ', '+')
-    nav_req = 'address={}&key={}'.format(location, GCODE_KEY)
-    request = GCODE_URL + nav_req
-    result = requests.get(request)
-    data = result.json()
-    status = data['status']
-
-    geo_location = {}
-    if str(status) == "OK":
-        sizeofjson = len(data['results'][0]['address_components'])
-        for i in range(sizeofjson):
-            sizeoftype = len(data['results'][0]['address_components'][i]['types'])
-            if sizeoftype == 3:
-                geo_location[data['results'][0]['address_components'][i]['types'][2]] = \
-                    data['results'][0]['address_components'][i]['long_name']
-
-            else:
-                if data['results'][0]['address_components'][i]['types'][0] == 'administrative_area_level_1':
-                    geo_location['state'] = data['results'][0]['address_components'][i]['long_name']
-
-                elif data['results'][0]['address_components'][i]['types'][0] == 'administrative_area_level_2':
-                    geo_location['city'] = data['results'][0]['address_components'][i]['long_name']
-                    geo_location['town'] = geo_location['city']
-
-                else:
-                    geo_location[data['results'][0]['address_components'][i]['types'][0]] = \
-                        data['results'][0]['address_components'][i]['long_name']
-
-        formatted_address = data['results'][0]['formatted_address']
-        geo_location['lat'] = data['results'][0]['geometry']['location']['lat']
-        geo_location['lang'] = data['results'][0]['geometry']['location']['lng']
-        geo_location['formatted_address'] = formatted_address
-
-        return geo_location
-
-
-# Extract latitude, longitude and city based on facility name
-for i in range(len(fac_gis_noloc)):
-    try:
-        try:
-            try:
-                print("Processing facility", fac_gis_noloc['fac_name'][i])
-                geo_info = reverse_gcode(fac_gis_noloc['fac_name'][i] + 'Malawi')
-                fac_gis_noloc['lat'][i] = geo_info['lat']
-                fac_gis_noloc['long'][i] = geo_info['lang']
-                fac_gis_noloc['gis_source'][i] = 'Google maps geolocation'
-                fac_gis_noloc['district'][i] = geo_info['city']
-            except ValueError:
-                pass
-        except TypeError:
-            pass
-    except KeyError:
-        pass
-
-# Drop incorrect GIS coordinates from the above generated dataset
-conda = fac_gis_noloc.district.isin(districts)  # districts not from Malawi
-cond1 = fac_gis_noloc['lat'] > -8.5
-cond2 = fac_gis_noloc['lat'] < -17.5
-cond3 = fac_gis_noloc['long'] > 36.5
-cond4 = fac_gis_noloc['long'] < 32.5
-condb = cond1 | cond2 | cond3 | cond4  # outside Malawi's boundaries
-fac_gis_noloc.loc[~conda | condb, 'lat'] = np.nan
-fac_gis_noloc.loc[~conda | condb, 'long'] = np.nan
-fac_gis_noloc.loc[~conda | condb, 'district'] = np.nan
-
-cond = fac_gis_noloc.gis_source.isna()
-fac_gis_noloc.loc[cond, 'lat'] = np.nan
-fac_gis_noloc.loc[cond, 'long'] = np.nan
-
-# Append newly generated GIS information to the raw data
-fac_gis = fac_gis_noloc.append(fac_gis_clean)
-
-# Drop incorrect GIS coordinates based on later comparison with district data from LMIS
-list_of_incorrect_locations = ['Bilal Clinic', 'Biliwiri Health Centre', 'Chilonga Health care Health Centre',
-                               'Diamphwi Health Centre', 'Matope Health Centre (CHAM)', 'Nambazo Health Centre',
-                               'Nkhwayi Health Centre', 'Nsambe Health Centre (CHAM)', 'Padley Pio Health Centre',
-                               'Phanga Health Centre', 'Somba Clinic', "St. Martin's Molere Health Centre CHAM",
-                               'Ngapani Clinic', 'Mulungu Alinafe Clinic', 'Mdeza Health Centre',
-                               'Matandani Health Centre (CHAM)',
-                               'Sunrise Clinic', 'Sucoma Clinic']
-mapped_to_malawi = fac_gis.lat == -13.254308
-cond = fac_gis.fac_name.isin(list_of_incorrect_locations) | mapped_to_malawi
-fac_gis.loc[cond, 'lat'] = np.nan
-fac_gis.loc[cond, 'long'] = np.nan
-fac_gis.loc[cond, 'gis_source'] = np.nan
-fac_gis.loc[cond, 'district'] = np.nan
-
-# 2. Clean data using information from LMIS #
-#####################################################################################################
-# --- 2.1 Load and set up LMIS data --- #
-stkout_df = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv",
-                        low_memory=False)
-
-# Drop rows which can't be used in regression analysis
-regsubset_cond1 = stkout_df['data_source'] == 'original_lmis_data'
-regsubset_cond2 = stkout_df[
-                      'fac_type_tlo'] == 'Facility_level_0'  # since only one facility from Mchinji reported in OpenLMIS
-stkout_df_reg = stkout_df[regsubset_cond1 & ~regsubset_cond2]
-
-# Clean some district names to match with master health facility registry
-rename_districts = {
-    'Nkhota Kota': 'Nkhotakota',
-    'Nkhata bay': 'Nkhata Bay'
-}
-stkout_df['district'] = stkout_df['district'].replace(rename_districts)
-
-# Keep only relevant columns
-lmis_district = stkout_df[['fac_name', 'fac_type_tlo', 'district']]
-lmis_district = lmis_district.drop_duplicates()
-
-# --- 2.2 Clean district column and assign relevant DHO to each facility --- #
-# Manual fixes before assigning DHO
-# Master Health facility registry did not differentiate between Mzimba North and Mzimba South --> get this data
-# and any other district discrepancies from LMIS
-fac_gis = fac_gis.rename(columns={'district': 'district_mhfr'})
-fac_gis = pd.merge(fac_gis, lmis_district, how='left', on='fac_name')
-
-list_mhfr_district_is_correct = ['Chididi Health Centre', 'Chikowa Health Centre',
-                                 'Chileka Health Centre']
-cond_mhfr_district_is_correct = fac_gis.fac_name.isin(list_mhfr_district_is_correct)
-cond_lmis_district_missing = fac_gis.district.isna()
-fac_gis.loc[cond_mhfr_district_is_correct | cond_lmis_district_missing, 'district'] = fac_gis.district_mhfr
-fac_gis = fac_gis.drop(columns=['zone', 'district_mhfr', 'open_date', 'manual_entry'])
-
-# --- 1.3 Extract final file with GIS locations into .csv --- #
-fac_gis = fac_gis[fac_gis['lat'].notna()]  # Keep rows with GIS locations
-fac_gis.to_csv(path_for_new_resourcefiles / "ResourceFile_Facility_locations.csv")
-
-# Locate the corresponding DHO for each facility
-cond1 = fac_gis['fac_name'].str.contains('DHO')
-cond2 = fac_gis['fac_name'].str.contains('istrict')
-# Create columns indicating the coordinates of the corresponding DHO for each facility
-dho_df = fac_gis[cond1 | cond2].reset_index()
-# Rename columns
-dho_df = dho_df.rename(columns={'lat': 'lat_dh', 'long': 'long_dh'})
-
-# Merge main GIS dataframe with corresponding DHO
-fac_gis = pd.merge(fac_gis, dho_df[['district', 'lat_dh', 'long_dh']], how='left', on='district')
-
-# --- 2.3 Assign relevant CMST Regional Medical Store to each facility --- #
-# Create columns indicating the coordinates of the corresponding CMST warehouse (regional medical store) for each
-# facility
-fac_gis['lat_rms'] = np.nan
-fac_gis['long_rms'] = np.nan
-fac_gis['rms'] = np.nan
-
-# RMS Center (-13.980394, 33.783521)
-cond_center1 = fac_gis['district'].isin(['Kasungu', 'Ntchisi', 'Dowa', 'Mchinji', 'Lilongwe', 'Ntcheu',
-                                         'Dedza', 'Nkhotakota', 'Salima'])
-cond_center2 = fac_gis['fac_name'].str.contains('Kamuzu Central Hospital')
-fac_gis.loc[cond_center1 | cond_center2, 'lat_rms'] = -13.980394
-fac_gis.loc[cond_center1 | cond_center2, 'long_rms'] = 33.783521
-fac_gis.loc[cond_center1 | cond_center2, 'rms'] = 'RMS Center'
-
-# RMS North (-11.425590, 33.997467)
-cond_north1 = fac_gis['district'].isin(['Nkhata Bay', 'Rumphi', 'Chitipa', 'Likoma', 'Karonga',
-                                        'Mzimba North', 'Mzimba South'])
-cond_north2 = fac_gis['fac_name'].str.contains('Mzuzu Central Hospital')
-fac_gis.loc[cond_north1 | cond_north2, 'lat_rms'] = -11.425590
-fac_gis.loc[cond_north1 | cond_north2, 'long_rms'] = 33.997467
-fac_gis.loc[cond_north1 | cond_north2, 'rms'] = 'RMS North'
-
-# RMS South (-15.804544, 35.021192)
-cond_south1 = fac_gis['district'].isin(['Blantyre', 'Balaka', 'Machinga', 'Zomba', 'Mangochi', 'Thyolo', 'Nsanje',
-                                        'Chikwawa', 'Mwanza', 'Neno', 'Mulanje', 'Phalombe', 'Chiradzulu'])
-cond_south2 = fac_gis['fac_name'].str.contains('Queen Elizabeth Central')
-cond_south3 = fac_gis['fac_name'].str.contains('Zomba Central')
-cond_south4 = fac_gis['fac_name'].str.contains('Zomba Mental')
-fac_gis.loc[cond_south1 | cond_south2 | cond_south3 | cond_south4, 'lat_rms'] = -15.804544
-fac_gis.loc[cond_south1 | cond_south2 | cond_south3 | cond_south4, 'long_rms'] = 35.021192
-fac_gis.loc[cond_south1 | cond_south2 | cond_south3 | cond_south4, 'rms'] = 'RMS South'
-fac_gis['district'].unique()
-
-# 3. Generate data on distance and travel time between facilities and DHO/RMS #
-#####################################################################################################
-# --- 3.1 Distance and travel time of each facility from the corresponding DHO --- #
-fac_gis['dist_todh'] = np.nan
-fac_gis['drivetime_todh'] = np.nan
-for i in range(len(fac_gis)):
-    try:
-        # print("Processing facility", i)
-        latfac = fac_gis['lat'][i]
-        longfac = fac_gis['long'][i]
-        latdho = fac_gis['lat_dh'][i]
-        longdho = fac_gis['long_dh'][i]
-        origin = (latdho, longdho)
-        dest = (latfac, longfac)
-
-        fac_gis['dist_todh'][i] = \
-            gmaps.distance_matrix(origin, dest, mode='driving')['rows'][0]['elements'][0]['distance']['value']
-        fac_gis['drivetime_todh'][i] = \
-            gmaps.distance_matrix(origin, dest, mode='driving')['rows'][0]['elements'][0]['duration']['value']
-    except:
-        pass
-
-# --- 3.2 Distance and travel time of each facility from the corresponding RMS --- #
-fac_gis['dist_torms'] = np.nan
-fac_gis['drivetime_torms'] = np.nan
-for i in range(len(fac_gis)):
-    try:
-        # print("Processing facility", i)
-        latfac = fac_gis['lat'][i]
-        longfac = fac_gis['long'][i]
-        latdho = fac_gis['lat_rms'][i]
-        longdho = fac_gis['long_rms'][i]
-        origin = (latdho, longdho)
-        dest = (latfac, longfac)
-
-        fac_gis['dist_torms'][i] = \
-            gmaps.distance_matrix(origin, dest, mode='driving')['rows'][0]['elements'][0]['distance']['value']
-        fac_gis['drivetime_torms'][i] = \
-            gmaps.distance_matrix(origin, dest, mode='driving')['rows'][0]['elements'][0]['duration']['value']
-    except:
-        pass
-
-# Update distance values from DH to 0 for levels 2 and above
-cond1 = fac_gis['fac_type_tlo'] == 'Facility_level_2'
-cond2 = fac_gis['fac_type_tlo'] == 'Facility_level_3'
-cond3 = fac_gis['fac_type_tlo'] == 'Facility_level_4'
-fac_gis.loc[cond1 | cond2 | cond3, 'dist_todh'] = 0
-fac_gis.loc[cond1 | cond2 | cond3, 'drivetime_todh'] = 0
-
-# 4. Save data to be merge into Consumable availabilty dataset for regression analysis #
-#####################################################################################################
-# Keep only necessary columns and save as .csv
-fac_gis = fac_gis[['district', 'rms', 'lat', 'long', 'lat_dh', 'long_dh', 'lat_rms', 'long_rms',
-                   'dist_torms', 'drivetime_torms', 'dist_todh', 'drivetime_todh', 'fac_name', 'gis_source']]
-
-# - 1.2.5 Export distances file to dropbox - #
-fac_gis.to_csv(path_to_files_in_the_tlo_dropbox / 'gis_data/facility_distances.csv')
-
-# 5. Descriptive graphs #
-#####################################################################################################
-groups = fac_gis.groupby('district')
-
-# Scatterplot of distance and drive time to DHO
-fig, ax = plt.subplots()
-ax.margins(0.05)  # Optional, just adds 5% padding to the autoscaling
-for name, group in groups:
-    ax.plot(group.dist_todh / 1000, group.drivetime_todh, marker='o', linestyle='', ms=5, label=name)
-# Shrink current axis by 20% to fit legend
-box = ax.get_position()
-ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
-# Put a legend to the right of the current axis
-ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-plt.xlabel("Distance (kilometers)", fontsize=12)
-plt.ylabel("Drive time (minutes)", fontsize=12)
-plt.savefig('C:/Users/sm2511/OneDrive - University of York/Desktop/faclocation_wrtdh_new.png')
-
-# Scatterplot of distance and drive time to RMS
-groups = fac_gis.groupby('rms')
-fig, ax = plt.subplots()
-ax.margins(0.05)  # Optional, just adds 5% padding to the autoscaling
-for name, group in groups:
-    ax.plot(group.dist_torms / 1000, group.drivetime_torms, marker='o', linestyle='', ms=5, label=name)
-# Shrink current axis by 20% to fit legend
-box = ax.get_position()
-ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
-# Put a legend to the right of the current axis
-ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-plt.xlabel("Distance (kilometers)", fontsize=12)
-plt.ylabel("Drive time (minutes)", fontsize=12)
-plt.savefig('C:/Users/sm2511/OneDrive - University of York/Desktop/faclocation_wrtrms.png')
-"""
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
index 3615afd400..d3a58ee15a 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/consumables_availability_estimation.py
@@ -30,14 +30,12 @@
 
 from tlo.methods.consumables import check_format_of_consumables_file
 
-# Set local Dropbox source
-path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
-    '/Users/sm2511/Dropbox/Thanzi la Onse'
-    # '/Users/sejjj49/Dropbox/Thanzi la Onse'
-    # 'C:/Users/tmangal/Dropbox/Thanzi la Onse'
+# Set local shared folder source
+path_to_share = Path(  # <-- point to the shared folder
+    '/Users/sm2511/Library/CloudStorage/OneDrive-SharedLibraries-ImperialCollegeLondon/TLOModel - WP - Documents/'
 )
 
-path_to_files_in_the_tlo_dropbox = path_to_dropbox / "05 - Resources/Module-healthsystem/consumables raw files/"
+path_to_files_in_the_tlo_shared_drive = path_to_share / "07 - Data/Consumables data/"
 
 # define a timestamp for script outputs
 timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
@@ -68,7 +66,7 @@ def change_colnames(df, NameChangeList):  # Change column names
 #########################################################################################
 
 # Import 2018 data
-lmis_df = pd.read_csv(path_to_files_in_the_tlo_dropbox / 'ResourceFile_LMIS_2018.csv', low_memory=False)
+lmis_df = pd.read_csv(path_to_files_in_the_tlo_shared_drive / 'OpenLMIS/2018/ResourceFile_LMIS_2018.csv', low_memory=False)
 
 # 1. BASIC CLEANING ##
 # Rename columns
@@ -515,7 +513,7 @@ def custom_agg_stkout(x):
 unmatched_consumables = unmatched_consumables[unmatched_consumables['item_y'].isna()]
 
 # ** Extract stock availability data from HHFA and clean data **
-hhfa_df = pd.read_excel(path_to_files_in_the_tlo_dropbox / 'ResourceFile_hhfa_consumables.xlsx', sheet_name='hhfa_data')
+hhfa_df = pd.read_excel(path_to_files_in_the_tlo_shared_drive / 'ResourceFile_hhfa_consumables.xlsx', sheet_name='hhfa_data')
 
 # Use the ratio of availability rates between levels 1b on one hand and levels 2 and 3 on the other to extrapolate
 # availability rates for levels 2 and 3 from the HHFA data
@@ -541,7 +539,7 @@ def custom_agg_stkout(x):
     hhfa_df.loc[cond, var] = 1
 
 # Add further assumptions on consumable availability from other sources
-assumptions_df = pd.read_excel(open(path_to_files_in_the_tlo_dropbox / 'ResourceFile_hhfa_consumables.xlsx', 'rb'),
+assumptions_df = pd.read_excel(open(path_to_files_in_the_tlo_shared_drive / 'ResourceFile_hhfa_consumables.xlsx', 'rb'),
                                sheet_name='availability_assumptions')
 assumptions_df = assumptions_df[['item_code', 'available_prop_Facility_level_0',
                                  'available_prop_Facility_level_1a', 'available_prop_Facility_level_1b',
@@ -606,35 +604,57 @@ def custom_agg_stkout(x):
 stkout_df = pd.concat([stkout_df, hhfa_fac0], axis=0, ignore_index=True)
 
 # --- 6.4 Generate new category variable for analysis --- #
-stkout_df['category'] = stkout_df['module_name'].str.lower()
-cond_RH = (stkout_df['category'].str.contains('care_of_women_during_pregnancy')) | \
-          (stkout_df['category'].str.contains('labour'))
-cond_newborn = (stkout_df['category'].str.contains('newborn'))
-cond_childhood = (stkout_df['category'] == 'acute lower respiratory infections') | \
-                 (stkout_df['category'] == 'measles') | \
-                 (stkout_df['category'] == 'diarrhoea')
-cond_rti = stkout_df['category'] == 'road traffic injuries'
-cond_cancer = stkout_df['category'].str.contains('cancer')
-cond_ncds = (stkout_df['category'] == 'epilepsy') | \
-            (stkout_df['category'] == 'depression')
-stkout_df.loc[cond_RH, 'category'] = 'reproductive_health'
-stkout_df.loc[cond_cancer, 'category'] = 'cancer'
-stkout_df.loc[cond_newborn, 'category'] = 'neonatal_health'
-stkout_df.loc[cond_childhood, 'category'] = 'other_childhood_illnesses'
-stkout_df.loc[cond_rti, 'category'] = 'road_traffic_injuries'
-stkout_df.loc[cond_ncds, 'category'] = 'ncds'
-
-cond_condom = stkout_df['item_code'] == 2
-stkout_df.loc[cond_condom, 'category'] = 'contraception'
-
-# Create a general consumables category
-general_cons_list = [300, 33, 57, 58, 141, 5, 6, 10, 21, 23, 127, 24, 80, 93, 144, 149, 154, 40, 67, 73, 76,
-                     82, 101, 103, 88, 126, 135, 71, 98, 171, 133, 134, 244, 247]
-diagnostics_cons_list = [41, 50, 128, 216, 2008, 47, 190, 191, 196, 206, 207, 163, 175, 184,
-                         187]  # for now these have not been applied because most diagnostics are program specific
-
-cond_general = stkout_df['item_code'].isin(general_cons_list)
-stkout_df.loc[cond_general, 'category'] = 'general'
+def recategorize_modules_into_consumable_categories(_df):
+    _df['item_category'] = _df['module_name'].str.lower()
+    cond_RH = (_df['item_category'].str.contains('care_of_women_during_pregnancy')) | \
+              (_df['item_category'].str.contains('labour'))
+    cond_newborn = (_df['item_category'].str.contains('newborn'))
+    cond_newborn[cond_newborn.isna()] = False
+    cond_childhood = (_df['item_category'] == 'acute lower respiratory infections') | \
+                     (_df['item_category'] == 'measles') | \
+                     (_df['item_category'] == 'diarrhoea')
+    cond_rti = _df['item_category'] == 'road traffic injuries'
+    cond_cancer = _df['item_category'].str.contains('cancer')
+    cond_cancer[cond_cancer.isna()] = False
+    cond_ncds = (_df['item_category'] == 'epilepsy') | \
+                (_df['item_category'] == 'depression')
+    _df.loc[cond_RH, 'item_category'] = 'reproductive_health'
+    _df.loc[cond_cancer, 'item_category'] = 'cancer'
+    _df.loc[cond_newborn, 'item_category'] = 'neonatal_health'
+    _df.loc[cond_childhood, 'item_category'] = 'other_childhood_illnesses'
+    _df.loc[cond_rti, 'item_category'] = 'road_traffic_injuries'
+    _df.loc[cond_ncds, 'item_category'] = 'ncds'
+    cond_condom = _df['item_code'] == 2
+    _df.loc[cond_condom, 'item_category'] = 'contraception'
+
+    # Create a general consumables category
+    general_cons_list = [300, 33, 57, 58, 141, 5, 6, 10, 21, 23, 127, 24, 80, 93, 144, 149, 154, 40, 67, 73, 76,
+                         82, 101, 103, 88, 126, 135, 71, 98, 171, 133, 134, 244, 247, 49, 112, 1933, 1960, 9,
+                         19,20, 40, 47, 49, 50, 75, 112, 127, 128, 135, 141, 154, 1933, 1960]
+    cond_general = _df['item_code'].isin(general_cons_list)
+    _df.loc[cond_general, 'item_category'] = 'general'
+
+
+    # Fill gaps in categories
+    dict_for_missing_categories = {292: 'acute lower respiratory infections', 293: 'acute lower respiratory infections',
+                                   307: 'reproductive_health', 2019: 'reproductive_health',
+                                   2678: 'tb', 1171: 'other_childhood_illnesses', 1237: 'cancer', 1239: 'cancer',
+                                   10: "reproductive_health", 39: "reproductive_health", 41: "reproductive_health",
+                                   64: "neonatal_health", 117: "reproductive_health", 150: "epi", 151: "epi",
+                                   153: "epi", 155: "epi", 157: "epi", 158: "epi", 175: "tb", 184: "tb", 190: "hiv",
+                                   197: "hiv", 216: "cardiometabolicdisorders", 234: "cardiometabolicdisorders",
+                                   261: "cancer", 280: "ncds", 285: "other_childhood_illnesses",
+                                   286: "other_childhood_illnesses", 1197: "epi", 1221: "other_childhood_illnesses",
+                                   2064: "ncds", 2670: "reproductive_health"}
+    # Use map to create a new series from item_code to fill missing values in category
+    mapped_categories = _df['item_code'].map(dict_for_missing_categories)
+    # Use fillna on the 'item_category' column to fill missing values using the mapped_categories
+    _df['item_category'] = _df['item_category'].fillna(mapped_categories)
+
+    return _df
+
+stkout_df = recategorize_modules_into_consumable_categories(stkout_df)
+item_code_category_mapping = stkout_df[['item_category', 'item_code']].drop_duplicates()
 
 # --- 6.5 Replace district/fac_name/month entries where missing --- #
 for var in ['district', 'fac_name', 'month']:
@@ -822,12 +842,14 @@ def interpolate_missing_with_mean(_ser):
 # Check that there are not missing values
 assert not pd.isnull(full_set_interpolated).any().any()
 
+full_set_interpolated = full_set_interpolated.reset_index().merge(item_code_category_mapping, on = 'item_code', how = 'left', validate = 'm:1')
+
 # --- Check that the exported file has the properties required of it by the model code. --- #
-check_format_of_consumables_file(df=full_set_interpolated.reset_index(), fac_ids=fac_ids)
+check_format_of_consumables_file(df=full_set_interpolated, fac_ids=fac_ids)
 
 # %%
 # Save
-full_set_interpolated.reset_index().to_csv(
+full_set_interpolated.to_csv(
     path_for_new_resourcefiles / "ResourceFile_Consumables_availability_small.csv",
     index=False
 )
@@ -849,7 +871,7 @@ def interpolate_missing_with_mean(_ser):
 hhfa_comparison_df = hhfa_comparison_df.rename({'fac_type_tlo': 'Facility_Level'}, axis=1)
 
 # ii. Collapse final model availability data by facility level
-final_availability_df = full_set_interpolated.reset_index()
+final_availability_df = full_set_interpolated
 mfl = pd.read_csv(resourcefilepath / "healthsystem" / "organisation" / "ResourceFile_Master_Facilities_List.csv")
 final_availability_df = pd.merge(final_availability_df, mfl[['District', 'Facility_Level', 'Facility_ID']], how="left",
                                  on=['Facility_ID'],
@@ -871,7 +893,6 @@ def interpolate_missing_with_mean(_ser):
 size = 10
 comparison_df['consumable_labels'] = comparison_df['consumable_name_tlo'].str[:10]
 
-
 # Define function to draw calibration plots at different levels of disaggregation
 def comparison_plot(level_of_disaggregation, group_by_var, colour):
     comparison_df_agg = comparison_df.groupby([group_by_var],
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/descriptive_stats.py b/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/descriptive_stats.py
deleted file mode 100644
index fc5c775bce..0000000000
--- a/src/scripts/data_file_processing/healthsystem/consumables/consumable_resource_analyses_with_lmis/descriptive_stats.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-This script generates the consumables availability dataset for regression analysis using the outputs of -
-consumables_availability_estimation.py and clean_fac_locations.py -
-and generates descriptive figures and tables.
-"""
-import datetime
-from pathlib import Path
-
-import pandas as pd
-
-# import numpy as np
-# import calendar
-# import copy
-# import matplotlib.pyplot as plt
-# from matplotlib.lines import Line2D
-# from matplotlib import pyplot # for figures
-# import seaborn as sns
-# import math
-
-# Path to TLO directory
-outputfilepath = Path("./outputs")
-resourcefilepath = Path("./resources")
-path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
-
-# Set local Dropbox source
-path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
-    'C:/Users/sm2511/Dropbox/Thanzi la Onse'
-)
-
-path_to_files_in_the_tlo_dropbox = path_to_dropbox / "05 - Resources/Module-healthsystem/consumables raw files/"
-
-# define a timestamp for script outputs
-timestamp = datetime.datetime.now().strftime("_%Y_%m_%d_%H_%M")
-
-# print the start time of the script
-print('Script Start', datetime.datetime.now().strftime('%H:%M'))
-
-# 1. DATA IMPORT AND CLEANING #
-#########################################################################################
-# --- 1.1 Import consumables availability data --- #
-stkout_df = pd.read_csv(path_for_new_resourcefiles / "ResourceFile_Consumables_availability_and_usage.csv",
-                        low_memory=False)
-
-# Drop rows which can't be used in regression analysis
-regsubset_cond1 = stkout_df['data_source'] == 'original_lmis_data'
-regsubset_cond2 = stkout_df[
-                      'fac_type_tlo'] == 'Facility_level_0'  # since only one facility from Mchinji reported in OpenLMIS
-stkout_df_reg = stkout_df[regsubset_cond1 & ~regsubset_cond2]
-
-# Clean some district names to match with master health facility registry
-rename_districts = {
-    'Nkhota Kota': 'Nkhotakota',
-    'Nkhata bay': 'Nkhata Bay'
-}
-stkout_df['district'] = stkout_df['district'].replace(rename_districts)
-
-# --- 1.2 Import GIS data --- #
-fac_gis = pd.read_csv(path_to_files_in_the_tlo_dropbox / "gis_data/facility_distances.csv")
-
-# --- 1.3 Merge cleaned LMIS data with GIS data --- #
-consumables_df = pd.merge(stkout_df.drop(columns=['district', 'Unnamed: 0']), fac_gis.drop(columns=['Unnamed: 0']),
-                          how='left', on='fac_name')
-consumables_df.to_csv(path_to_files_in_the_tlo_dropbox / 'consumables_df.csv')
diff --git a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
index 7ca04f763f..db6af01154 100644
--- a/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
+++ b/src/scripts/data_file_processing/healthsystem/consumables/processing_data_from_one_health/generate_consumables_item_codes_and_packages.py
@@ -19,22 +19,20 @@
 import numpy as np
 import pandas as pd
 
-# Set local Dropbox source
-path_to_dropbox = Path(  # <-- point to the TLO dropbox locally
-   # '/Users/tbh03/Dropbox (SPH Imperial College)/Thanzi la Onse Theme 1 SHARE'
-    '/Users/sm2511/Dropbox/Thanzi La Onse')
+# Set local shared folder source
+path_to_share = Path(  # <-- point to the shared folder
+    '/Users/sm2511/Library/CloudStorage/OneDrive-SharedLibraries-ImperialCollegeLondon/TLOModel - WP - Documents/'
+)
 
 resourcefilepath = Path("./resources")
 path_for_new_resourcefiles = resourcefilepath / "healthsystem/consumables"
 
 
 # EHP Consumables list
-path_to_files_in_the_tlo_dropbox = path_to_dropbox / "05 - Resources/Module-healthsystem/consumables raw files/"
-
-workingfile_ehp_consumables = path_to_dropbox / \
+workingfile_ehp_consumables = path_to_share / \
               "05 - Resources/Module-healthsystem/From Matthias Arnold/ORIGINAL_Intervention input.xlsx"
 
-workingfile_one_health = path_to_dropbox / \
+workingfile_one_health = path_to_share / \
                          "07 - Data/OneHealth projection files/OneHealth commodities.xlsx"
 
 
@@ -157,7 +155,8 @@
 
 wb = wb.merge(intv_codes, on='Intervention_Pkg', how='left', indicator=True)
 assert (wb['_merge'] == 'both').all()
-wb = wb.drop(columns='_merge')
+wb = wb.drop(columns=['_merge','Expected_Units_Per_Case',
+                'Unit_Cost'])
 
 # Assign a unique code for each individual consumable item
 unique_items = pd.unique(wb['Items'])
@@ -172,9 +171,7 @@
          'Intervention_Pkg',
          'Intervention_Pkg_Code',
          'Items',
-         'Item_Code',
-         'Expected_Units_Per_Case',
-         'Unit_Cost']]
+         'Item_Code']]
 
 assert not pd.isnull(wb).any().any()
 
@@ -215,7 +212,7 @@
 only_in_oh['Intervention_Pkg'] = 'Misc'
 only_in_oh['Intervention_Pkg_Code'] = -99
 only_in_oh['Item_Code'] = np.arange(1000, 1000 + len(only_in_oh))
-only_in_oh['Expected_Units_Per_Case'] = 1.0
+only_in_oh = only_in_oh.drop(columns = 'Unit_Cost')
 
 assert set(only_in_oh.columns) == set(wb.columns)
 
@@ -249,8 +246,6 @@ def add_record(df: pd.DataFrame, record: Dict):
     assert set(df.columns) == set(record.keys())
     return pd.concat([df, pd.DataFrame.from_records([record])], ignore_index=True)
 
-
-
 cons = add_record(
     cons,
     {
@@ -258,10 +253,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "Forceps, obstetric",
-        'Item_Code': 2669,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-        }
+        'Item_Code': 2669}
 )
 
 cons = add_record(
@@ -271,10 +263,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "Vacuum, obstetric",
-        'Item_Code': 2670,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2670},
 )
 
 cons = add_record(
@@ -284,10 +273,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "First-line ART regimen: adult",
-        'Item_Code': 2671,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2671},
 )
 
 cons = add_record(
@@ -297,10 +283,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "First line ART regimen: older child",
-        'Item_Code': 2672,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2672},
 )
 
 cons = add_record(
@@ -310,10 +293,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "First line ART regimen: young child",
-        'Item_Code': 2673,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2673},
 )
 
 cons = add_record(
@@ -323,10 +303,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Misc",
         'Intervention_Pkg_Code': -99,
         'Items': "Pre-exposure prophlaxis for HIV",
-        'Item_Code': 2674,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2674},
 )
 
 cons = add_record(
@@ -336,10 +313,7 @@ def add_record(df: pd.DataFrame, record: Dict):
         'Intervention_Pkg': "Isoniazid preventative therapy for HIV+ no TB",
         'Intervention_Pkg_Code': 82,
         'Items': "Isoniazid/Rifapentine",
-        'Item_Code': 2678,
-        'Expected_Units_Per_Case': 1.0,
-        'Unit_Cost': 1.0
-    },
+        'Item_Code': 2678},
 )
 
 cons = add_record(
@@ -349,9 +323,7 @@ def add_record(df: pd.DataFrame, record: Dict):
     'Intervention_Pkg': "Misc",
     'Intervention_Pkg_Code': -99,
     'Items': "Cystoscope",
-    'Item_Code': 285,
-    'Expected_Units_Per_Case': 1.0,
-    'Unit_Cost': np.nan},
+    'Item_Code': 285},
 )
 
 cons = add_record(
@@ -360,9 +332,7 @@ def add_record(df: pd.DataFrame, record: Dict):
     'Intervention_Pkg': "Misc",
     'Intervention_Pkg_Code': -99,
     'Items': "Endoscope",
-    'Item_Code': 280,
-    'Expected_Units_Per_Case': 1.0,
-    'Unit_Cost': np.nan},
+    'Item_Code': 280},
 )
 
 cons = add_record(
@@ -371,9 +341,7 @@ def add_record(df: pd.DataFrame, record: Dict):
     'Intervention_Pkg': "Misc",
     'Intervention_Pkg_Code': -99,
     'Items': "Prostate specific antigen test",
-    'Item_Code': 281,
-    'Expected_Units_Per_Case': 1.0,
-    'Unit_Cost': np.nan},
+    'Item_Code': 281},
 )
 
 
diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 674035ad98..1be37c3267 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -266,7 +266,7 @@ def _lookup_availability_of_consumables(self,
 
     def on_simulation_end(self):
         """Do tasks at the end of the simulation.
-         
+
         Raise warnings and enter to log about item_codes not recognised.
         """
         if self._not_recognised_item_codes:
@@ -339,7 +339,7 @@ def check_format_of_consumables_file(df, fac_ids):
     months = set(range(1, 13))
     item_codes = set(df.item_code.unique())
 
-    assert set(df.columns) == {'Facility_ID', 'month', 'item_code', 'available_prop'}
+    assert set(df.columns) == {'Facility_ID', 'month', 'item_code', 'item_category', 'available_prop'}
 
     # Check that all permutations of Facility_ID, month and item_code are present
     pd.testing.assert_index_equal(