sync from local

aleksandra-kim · Nov 23, 2021 · 62f2592 · 62f2592
1 parent 1f4706c
commit 62f2592
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 132 deletions.
diff --git a/consumption_model_ch/data/__init__.py b/consumption_model_ch/data/__init__.py
@@ -52,9 +52,9 @@ def get_exiobase_linking_data():
     )
 
 
-def get_exiobase_margins_data(year, sheet_name):
+def get_exiobase_margins_data(exiobase_year, sheet_name):
     df = pd.read_excel(
-        dirpath / "CH_{}.xls".format(year),
+        dirpath / "CH_{}.xls".format(exiobase_year),
         sheet_name=sheet_name,
         skiprows=11
     )

diff --git a/consumption_model_ch/extractors/consumption_db.py b/consumption_model_ch/extractors/consumption_db.py
@@ -12,6 +12,7 @@
 from ..data import get_consumption_df
 from ..data import get_agribalyse_df
 from ..utils import get_habe_filepath
+from ..import_databases import create_ecoinvent_33_project
 
 
 # Number of relevant columns in the raw file (df_raw) to extract info about activity
@@ -57,8 +58,9 @@ class ConsumptionDbExtractor(object):
     def extract(
         cls,
         directory,
+        ei33_path,
         name,
-        year='091011',
+        year,
         exclude_databases=(),
         replace_agribalyse_with_ecoinvent=True,
     ):
@@ -87,7 +89,7 @@ def extract(
             True if eggs and fish activities from agribalyse should be replaced with the ones in ecoinvent.
 
         """
-
+        create_ecoinvent_33_project(ei33_path)
         df_brightway, filepath_consumption_excel = cls.get_consumption_df(
             directory,
             name=name,
@@ -155,7 +157,7 @@ def get_consumption_df(
             cls,
             directory,
             name,
-            year='091011',
+            year,
             exclude_databases=(),
             replace_agribalyse_with_ecoinvent=True,
     ):
@@ -183,7 +185,7 @@ def create_consumption_excel(
             cls,
             directory,
             name,
-            year='091011',
+            year,
             exclude_databases=(),
             replace_agribalyse_with_ecoinvent=True,
     ):
@@ -456,7 +458,7 @@ def is_pattern_correct(cls, df_ind_j):
             return 0
 
     @classmethod
-    def append_one_exchange(cls, df, df_ind_j, conversion_dem_to_fu, exclude_dbs=(), replace_agribalyse_with_ei=True):
+    def append_one_exchange(cls, df, df_ind_j, conversion_dem_to_fu, exclude_dbs):
         """Extract info about one input activity, eg name, unit, location, etc and append it to the dataframe df."""
 
         # Extract the activity number
@@ -513,106 +515,3 @@ def append_one_exchange(cls, df, df_ind_j, conversion_dem_to_fu, exclude_dbs=(),
         df = cls.append_exchanges_in_correct_columns(df, input_act_values_dict)
 
         return df
-
-
-# @classmethod
-# def update_all_db(
-#         cls,
-#         df,
-#         update_ecoinvent=True,
-#         update_agribalyse=True,
-#         update_exiobase=True,
-#         use_ecoinvent_371=True,
-# ):
-#     """Update all databases in the consumption database. TODO make more generic"""
-#     if update_ecoinvent:
-#         if use_ecoinvent_371:
-#             ei_name = 'ecoinvent 3.7.1 cutoff'
-#         else:
-#             ei_name = 'ecoinvent 3.6 cutoff'
-#         df = cls.replace_one_db(df, 'ecoinvent 3.3 cutoff', ei_name)
-#     else:
-#         ei_name = 'ecoinvent 3.3 cutoff'
-#     if update_agribalyse:
-#         df = cls.replace_one_db(df, 'Agribalyse 1.2', 'Agribalyse 1.3 - {}'.format(ei_name))
-#     if update_exiobase:
-#         df = cls.replace_one_db(df, 'EXIOBASE 2.2', "Exiobase 3.8.1 Monetary 2015")
-#     return df
-
-
-# def import_consumption_db(
-#     habe_path,
-#     exclude_databases=(),
-#     consumption_db_name=CONSUMPTION_DB_NAME,
-#     habe_year='091011',
-#     ei_name="ecoinvent 3.7.1 cutoff",
-#     write_dir=None,
-#     replace_agribalyse_with_ecoinvent=True,
-#     exiobase_path=None,
-#     sut_path=None,
-# ):
-
-    # if consumption_db_name in bd.databases:
-    #     print(consumption_db_name + " database already present!!! No import is needed")
-    # else:
-    #     # 1. Create write_dir directory if it has not been specified or created.
-    #     # if write_dir is None:
-    #     #     write_dir = Path('write_files') / bd.projects.current.lower().replace(" ", "_")
-    #     # write_dir.mkdir(exist_ok=True, parents=True)
-    #
-    #     # 2. Make sure that 'ecoinvent 3.3 cutoff' project has been created
-    #     if 'ecoinvent 3.3 cutoff' not in bd.projects:
-    #         print(
-    #             'BW project `ecoinvent 3.3 cutoff` is needed, please run `create_ecoinvent_33_project(path_ei33).`'
-    #         )
-    #         return
-    #
-    #     # 3. Create `consumption_db.xlsx` that contains consumption database in the bw excel format.
-    #     # Extract consumption data from supporting information available at https://doi.org/10.1021/acs.est.8b01452.
-    #     consumption_model_path = DATADIR / "es8b01452_si_002.xlsx"
-    #     # Create dataframe that will be our consumption database after we add activities & exchanges from the raw file
-    #     df_bw = create_df_bw(CONSUMPTION_DB_NAME)
-    #     # Read data from the consumption model file
-    #     sheet_name = 'Overview & LCA-Modeling'
-    #     df_raw = pd.read_excel(consumption_model_path, sheet_name=sheet_name, header=2)
-    #     # Extract units from HABE
-    #     code_unit = get_units_habe(habe_path, habe_year)
-    #     # Add ON columns (to fix some formatting issues in the consumption model file)
-    #     df = complete_columns(df_raw)
-    #     # Parse Andi's excel file
-    #
-    #     act_indices = df.index[df['ConversionDem2FU'].notna()].tolist()  # indices of all activities
-    #     exclude_databases = [exclude_db.lower() for exclude_db in exclude_databases]
-    #     path_new_db = write_dir / 'consumption_db.xlsx'
-    #     if not path_new_db.exists():
-    #         print("--> Creating consumption_db.xlsx")
-    #         for ind in act_indices:
-    #             # For each row
-    #             df_ind = df.iloc[ind]
-    #             df_ind = df_ind[df_ind.notna()]
-    #             # Add activity
-    #             df_bw, df_act = append_activity(df_bw, df_ind[:N_ACT_RELEVANT],
-    #                                             code_unit)  # only pass columns relevant to this function
-    #             # Add exchanges
-    #             df_bw = append_exchanges(
-    #                 df_bw,
-    #                 df_ind,
-    #                 df_act,
-    #                 exclude_dbs=exclude_databases,
-    #                 replace_agribalyse_with_ei=replace_agribalyse_with_ecoinvent
-    #             )
-    #         df_bw.columns = list(string.ascii_uppercase[:len(df_bw.columns)])
-    #         # Update to relevant databases and save excel file
-    #         if "3.7.1" in ei_name:
-    #             use_ecoinvent_371 = True
-    #         else:
-    #             use_ecoinvent_371 = False
-    #
-    #         if replace_agribalyse_with_ecoinvent:
-    #             df_agribalyse_ei = pd.read_excel(DATADIR / "agribalyse_replaced_with_ecoinvent.xlsx")
-    #             df_bw = df_bw.append(df_agribalyse_ei, ignore_index=True)
-    #
-    #         df_bw = update_all_db(df_bw, use_ecoinvent_371=use_ecoinvent_371)
-    #         df_bw.to_excel(path_new_db, index=False, header=False)
-
-
diff --git a/consumption_model_ch/import_databases.py b/consumption_model_ch/import_databases.py
@@ -33,12 +33,14 @@ def import_exiobase_3(ex3_path, ex3_name):
         ex.write_database()
 
 
-def import_consumption_db(directory_habe, co_name, exclude_databases=(), exiobase_path=None):
+def import_consumption_db(directory_habe, co_name, year, ei33_path, exclude_databases=(), exiobase_path=None):
     if co_name in bd.databases:
         print(co_name + " database already present!!! No import is needed")
     else:
         co = ConsumptionDbImporter(
             directory_habe,
+            ei33_path=ei33_path,
+            year=year,
             exclude_databases=exclude_databases,
             replace_agribalyse_with_ecoinvent=True,
             exiobase_path=exiobase_path,

diff --git a/consumption_model_ch/importers/consumption_db.py b/consumption_model_ch/importers/consumption_db.py
@@ -12,7 +12,7 @@
 from ..strategies import modify_exchanges, link_exiobase
 
 # Default name of the consumption database
-CONSUMPTION_DB_NAME = 'CH consumption 1.0'
+CONSUMPTION_DB_NAME = 'swiss consumption 1.0'
 
 
 class ConsumptionDbImporter(LCIImporter):
@@ -21,20 +21,23 @@ class ConsumptionDbImporter(LCIImporter):
     def __init__(
         self,
         directory,
+        ei33_path,
         name=None,
         year='091011',
         exclude_databases=(),
         replace_agribalyse_with_ecoinvent=True,
         exiobase_path=None,
     ):
         start = time()
+        self.directory = directory
+        self.ei33_path = ei33_path
         self.db_name = name or CONSUMPTION_DB_NAME
         self.year = year
-        self.directory = directory
         self.df, self.filepath_consumption_excel = ConsumptionDbExtractor.extract(
             directory,
+            ei33_path=ei33_path,
             name=self.db_name,
-            year=year,
+            year=self.year,
             exclude_databases=exclude_databases,
             replace_agribalyse_with_ecoinvent=replace_agribalyse_with_ecoinvent,
         )

diff --git a/consumption_model_ch/strategies/consumption_db.py b/consumption_model_ch/strategies/consumption_db.py
@@ -148,8 +148,8 @@ def modify_exchanges(db, mapping, db_name):
     return db1
 
 
-def get_margins_df(year, margins_label):
-    dataframe = get_exiobase_margins_data(year, margins_label)
+def get_margins_df(exiobase_year, margins_label):
+    dataframe = get_exiobase_margins_data(exiobase_year, margins_label)
     columns = ['Unnamed: 2', 'Final consumption expenditure by households']
     dataframe = dataframe[columns]
     dataframe.columns = ['name', margins_label]
@@ -158,30 +158,30 @@ def get_margins_df(year, margins_label):
     return dataframe
 
 
-def concat_margins_df(year):
+def concat_margins_df(exiobase_year):
     trd_label = 'trade_margins_init'
     tsp_label = 'transport_margins_init'
     tax_label = 'product_taxes_init'
     bpt_label = 'bptot_ini'  # basic price total
     ppt_label = 'purchaser_price'  # purchaser price total
     labels = [trd_label, tsp_label, tax_label, bpt_label, ppt_label]
-    df_trd = get_margins_df(year, trd_label)
-    df_tsp = get_margins_df(year, tsp_label)
-    df_tax = get_margins_df(year, tax_label)
-    df_bpt = get_margins_df(year, bpt_label)
+    df_trd = get_margins_df(exiobase_year, trd_label)
+    df_tsp = get_margins_df(exiobase_year, tsp_label)
+    df_tax = get_margins_df(exiobase_year, tax_label)
+    df_bpt = get_margins_df(exiobase_year, bpt_label)
     df_margins = pd.concat([df_trd, df_tsp, df_tax, df_bpt], axis=1)
     return df_margins, labels
 
 
-def get_margins_shares(year):
+def get_margins_shares(exiobase_year):
     exiobase_381_change_names_data = get_exiobase_migration_data()
     exiobase_381_change_names_dict = {}
     for el in exiobase_381_change_names_data['data']:
         old_name = el[0][0]
         new_name = el[1]['name']
         exiobase_381_change_names_dict[old_name] = new_name
 
-    df_margins, labels = concat_margins_df(year)
+    df_margins, labels = concat_margins_df(exiobase_year)
     trd_label, tsp_label, tax_label, bpt_label, ppt_label = labels
 
     new_index = []
@@ -236,12 +236,12 @@ def link_exiobase(co, ex_name, ex_path):
         filename = "mrFinalDemand_version2.2.2.txt"
         columns = ['Unnamed: 0', 'Unnamed: 1', 'CH']
         chf_to_euro = 0.594290
-        year = 2007
+        exiobase_year = 2007
     elif '3.8.1' in ex_name:
         filename = "Y.txt"
         columns = ['region', 'Unnamed: 1', 'CH']
         chf_to_euro = 0.937234
-        year = 2015
+        exiobase_year = 2015
 
     filepath = Path(ex_path) / filename
 
@@ -288,7 +288,7 @@ def link_exiobase(co, ex_name, ex_path):
         sum_ += value
     exiobase_trade_margin_sectors_dict = {k: v / sum_ for k, v in exiobase_trade_margin_sectors_dict.items()}
 
-    df_margins = get_margins_shares(year)
+    df_margins = get_margins_shares(exiobase_year)
     dict_margins = df_margins.T.to_dict()
 
     mln_to_unit = 1e-6

diff --git a/dev/paper_gsa_realistic_models__import_databases.py b/dev/paper_gsa_realistic_models__import_databases.py
@@ -20,6 +20,7 @@
 path_base = Path('/Users/akim/Documents/LCA_files/')
 fp_gsa_project = path_base / "brightway2-project-GSA-backup.16-November-2021-11-50AM.tar.gz"
 directory_habe = path_base / 'HABE_2017/'
+fp_ei33 = path_base / 'ecoinvent_33_cutoff/datasets'
 
 project = "GSA"
 
@@ -40,7 +41,7 @@
 co_name = "CH consumption 1.0"
 if delete_consumption_db:
     del bd.databases[co_name]
-import_consumption_db(directory_habe, co_name, exclude_databases)
+import_consumption_db(directory_habe, fp_ei33, co_name, exclude_databases)
 co = bd.Database(co_name)
 
 # Add functional units

diff --git a/dev/paper_gsa_realistic_models_ex__import_databases.py b/dev/paper_gsa_realistic_models_ex__import_databases.py
@@ -19,13 +19,15 @@
 
 if __name__ == "__main__":
 
-    delete_consumption_db = False
+    delete_consumption_db = True
     add_functional_units = True
+    habe_year = '151617'
 
     path_base = Path('/Users/akim/Documents/LCA_files/')
     fp_gsa_project = path_base / "brightway2-project-GSA-backup.16-November-2021-11-50AM.tar.gz"
     directory_habe = path_base / 'HABE_2017/'
-    fp_ecoinvent = path_base / "ecoinvent_38_cutoff" / "datasets"
+    fp_ecoinvent_38 = path_base / "ecoinvent_38_cutoff" / "datasets"
+    fp_ecoinvent_33 = path_base / 'ecoinvent_33_cutoff/datasets'
     fp_exiobase = path_base / "exiobase_381_monetary" / "IOT_2015_pxp"
     fp_archetypes = path_base / "heia" / "hh_archetypes_weighted_ipcc_091011.csv"
 
@@ -34,8 +36,8 @@
 
     # Import all databases
     bi.bw2setup()
-    ei38_name = "ecoinvent"
-    import_ecoinvent(fp_ecoinvent, ei38_name)
+    ei38_name = "ecoinvent 3.8"
+    import_ecoinvent(fp_ecoinvent_38, ei38_name)
     ex38_name = "exiobase 3.8.1 monetary"
     import_exiobase_3(fp_exiobase, ex38_name)
 
@@ -47,7 +49,7 @@
     co_name = "CH consumption 1.0"
     if delete_consumption_db and co_name in bd.databases:
         del bd.databases[co_name]
-    import_consumption_db(directory_habe, co_name, exclude_databases, fp_exiobase)
+    import_consumption_db(directory_habe, fp_ecoinvent_33, co_name, habe_year, exclude_databases, fp_exiobase)
     co = bd.Database(co_name)
 
     # Add functional units