From 78a6e3554805ad826537df0509ac067b5e27f713 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Thu, 24 Oct 2024 13:10:42 -0400 Subject: [PATCH 01/12] edit cmor_mixer to glob files a lil dfferently. other bug fixes. add in new optional argument for additional control over file targeting. we also now have a \"run one file only\" style flag. new test config file fre/tests/test_files/CMORbite_var_list.json contains variables for processing test cases being workd through in run_test_file_cases.py with notes --- fre/cmor/cmor_mixer.py | 155 +++++++++++++------- fre/cmor/frecmor.py | 9 +- fre/tests/test_files/CMORbite_var_list.json | 11 ++ run_test_file_cases.py | 128 ++++++++++++++++ 4 files changed, 250 insertions(+), 53 deletions(-) create mode 100644 fre/tests/test_files/CMORbite_var_list.json create mode 100644 run_test_file_cases.py diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index dd0556f2..bfbec946 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -5,6 +5,7 @@ ''' import os +import glob import json import subprocess from pathlib import Path @@ -15,6 +16,7 @@ # ----- \start consts +DEBUG_MODE_RUN_ONE=True # # ----- \end consts @@ -56,22 +58,24 @@ def copy_nc(in_nc, out_nc): dsout.close() -def get_var_filenames(indir, var_filenames = None): +def get_var_filenames(indir, var_filenames = None, local_var = None): ''' - appends files ending in .nc located within indir to list var_filenames accepts two arguments + appends files ending in .nc located within indir to list var_filenames accepts three arguments indir: string, representing a path to a directory containing files ending in .nc extension var_filenames: list of strings, empty or non-empty, to append discovered filenames to. the object pointed to by the reference var_filenames is manipulated, and so need not be returned. + local_var: string, optional, if not None, will be used for ruling out filename targets ''' if var_filenames is None: var_filenames = [] - var_filenames_all = os.listdir(indir) + filename_pattern='.nc' if local_var is None else f'.{local_var}.nc' + print(f'(get_var_filenames) filename_pattern={filename_pattern}') + var_filenames_all=glob.glob(f'{indir}/*{filename_pattern}') print(f'(get_var_filenames) var_filenames_all={var_filenames_all}') for var_file in var_filenames_all: - if var_file.endswith('.nc'): - var_filenames.append(var_file) - #print(f"(get_var_filenames) var_filenames = {var_filenames}") + var_filenames.append( Path(var_file).name ) + print(f"(get_var_filenames) var_filenames = {var_filenames}") if len(var_filenames) < 1: raise ValueError(f'target directory had no files with .nc ending. indir =\n {indir}') var_filenames.sort() @@ -147,10 +151,10 @@ def create_tmp_dir(outdir): outdir.find("/work") != -1, outdir.find("/net" ) != -1 ] ): print(f'(cmorize_target_var_files) using /local /work /net ( tmp_dir = {outdir}/ )') - tmp_dir = "{outdir}/" + tmp_dir = str( Path("{outdir}/").resolve() ) else: print('(cmorize_target_var_files) NOT using /local /work /net (tmp_dir = outdir/tmp/ )') - tmp_dir = f"{outdir}/tmp/" + tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: @@ -249,11 +253,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ) # read experiment configuration file + print(f"(rewrite_netcdf_file_var) cmor is opening: json_exp_config = {json_exp_config}") cmor.dataset_json(json_exp_config) - print(f"(rewrite_netcdf_file_var) json_exp_config = {json_exp_config}") - print(f"(rewrite_netcdf_file_var) json_table_config = {json_table_config}") - # load variable list (CMOR table) + # load CMOR table + print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") cmor.load_table(json_table_config) #units = proj_table_vars["variable_entry"] [local_var] ["units"] @@ -404,13 +408,13 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, tmp_dir = create_tmp_dir( outdir ) print(f'(cmorize_target_var_files) will use tmp_dir={tmp_dir}') - print("\n\n==== begin (???) mysterious file movement ====================================") # loop over sets of dates, each one pointing to a file nc_fls = {} for i, iso_datetime in enumerate(iso_datetime_arr): - + print("\n\n==== REEXAMINE THIS file movement ====================================") + # why is nc_fls a filled list/array/object thingy here? see above line #nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{target_var}.nc" nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{local_var}.nc" @@ -423,6 +427,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, # create a copy of the input file with local var name into the work directory #nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{target_var}.nc" nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{local_var}.nc" + print(f"(cmorize_target_var_files) nc_file_work = {nc_file_work}") copy_nc( nc_fls[i], nc_file_work) @@ -432,22 +437,37 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, if Path(nc_ps_file).exists(): print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") copy_nc(nc_ps_file, nc_ps_file_work) - + print("\n\n==== REEXAMINE THIS file movement ====================================") # now we have a file in our targets, point CMOR to the configs and the input file(s) + + make_cmor_write_here = None + print( Path( tmp_dir ) ) + print( Path( os.getcwd() ) ) + if Path( tmp_dir ).is_absolute(): + print(f'tmp_dir is absolute') + make_cmor_write_here = tmp_dir + elif Path( tmp_dir ).exists(): # relative to where we are + print(f'tmp_dir is relative to CWD!') + make_cmor_write_here = os.getcwd() + '/'+tmp_dir # unavoidable, cmor module FORCES write to CWD + assert make_cmor_write_here is not None + + gotta_go_back_here=os.getcwd() + try: + print(f"cd'ing to \n {make_cmor_write_here}" ) + os.chdir( make_cmor_write_here ) + except: + raise OSError(f'could not chdir to {make_cmor_write_here}') + print ("(cmorize_target_var_files) calling rewrite_netcdf_file_var") - gotta_go_back_here=os.getcwd()+'/' - os.chdir(gotta_go_back_here+tmp_dir) # this is unavoidable, cmor module FORCES write to CWD - local_file_name = rewrite_netcdf_file_var( proj_table_vars , - local_var , - gotta_go_back_here + nc_file_work , - target_var , - gotta_go_back_here + json_exp_config , - gotta_go_back_here + json_table_config)#, -# gotta_go_back_here + tmp_dir ) + local_file_name = rewrite_netcdf_file_var( proj_table_vars , + local_var , + nc_file_work , + target_var , + json_exp_config , + json_table_config ) os.chdir(gotta_go_back_here) - assert Path( gotta_go_back_here+tmp_dir+local_file_name ).exists() - #assert False + # now that CMOR has rewritten things... we can take our post-rewriting actions # the final output filename will be... @@ -465,7 +485,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, print(f'(cmorize_target_var_files) WARNING: directory {filedir} already exists!') # hmm.... this is making issues for pytest - mv_cmd = f"mv {tmp_dir}{local_file_name} {filedir}" + mv_cmd = f"mv {tmp_dir}/{local_file_name} {filedir}" print(f"(cmorize_target_var_files) moving files...\n {mv_cmd}") subprocess.run(mv_cmd, shell=True, check=True) @@ -489,6 +509,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, if Path(nc_ps_file_work).exists(): Path(nc_ps_file_work).unlink() + if DEBUG_MODE_RUN_ONE: + print(f'WARNING: DEBUG_MODE_RUN_ONE is True!!!!') + print(f'WARNING: done processing one file!!!') + break @@ -497,9 +521,10 @@ def cmor_run_subtool( indir = None, json_var_list = None, json_table_config = None, json_exp_config = None , - outdir = None): + outdir = None, opt_var_name = None + ): ''' - primary steering function for the cmor_mixer tool, i.e essentially main. Accepts five args: + primary steering function for the cmor_mixer tool, i.e essentially main. Accepts six args: indir: string, directory containing netCDF files. keys specified in json_var_list are local variable names used for targeting specific files json_var_list: string, path pointing to a json file containing directory of key/value @@ -513,7 +538,10 @@ def cmor_run_subtool( indir = None, json_exp_config: json file containing other configuration details (FILL IN TO DO #TODO) outdir: string, directory root that will contain the full output and output directory structure generated by the cmor module upon request. + opt_var_name: string, optional, specify a variable name to specifically process only filenames matching + that variable name. I.e., this string help target local_vars, not target_vars. ''' + print(locals()) if None in [indir, json_var_list, json_table_config, json_exp_config, outdir]: raise ValueError(f'all input arguments are required!\n' '[indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' @@ -521,16 +549,20 @@ def cmor_run_subtool( indir = None, '{json_exp_config}, {outdir}]' ) # open CMOR table config file - print('(cmor_run_subtool) getting table variables from json_table_config') + print( '(cmor_run_subtool) getting table variables from json_table_config = \n' + f' {json_table_config}' ) try: with open( json_table_config, "r", encoding = "utf-8") as table_config_file: proj_table_vars=json.load(table_config_file) - + except Exception as exc: raise FileNotFoundError( f'ERROR: json_table_config file cannot be opened.\n' f' json_table_config = {json_table_config}' ) from exc + # now resolve the json_table_config path after confirming it can be open + json_table_config= str( Path(json_table_config).resolve() ) + # open input variable list print('(cmor_run_subtool) opening variable list json_var_list') try: @@ -542,31 +574,30 @@ def cmor_run_subtool( indir = None, f'ERROR: json_var_list file cannot be opened.\n' f' json_var_list = {json_var_list}' ) from exc - # examine input directory to obtain a list of input file targets - var_filenames = [] - get_var_filenames(indir, var_filenames) - print(f"(cmor_run_subtool) found filenames = \n {var_filenames}") - - # examine input files to obtain target date ranges - iso_datetime_arr = [] - get_iso_datetimes(var_filenames, iso_datetime_arr) - print(f"(cmor_run_subtool) found iso datetimes = \n {iso_datetime_arr}") - - # name_of_set == component label... - # which is not relevant for CMOR/CMIP... or is it? - name_of_set = var_filenames[0].split(".")[0] - print(f"(cmor_run_subtool) setting name_of_set = {name_of_set}") - + # make sure the exp config exists too while we're at it... + if Path(json_exp_config).exists(): # if so, resolve to absolute path + json_exp_config = str( Path( json_exp_config).resolve() ) + else: + raise FileNotFoundError( + f'ERROR: json_exp_config file cannot be opened.\n' + f' json_exp_config = {json_exp_config}' ) + # loop over entries in the json_var_list, read into var_list for local_var in var_list: # if its not in the table configurations variable_entry list, skip if var_list[local_var] not in proj_table_vars["variable_entry"]: - print(f"(cmor_run_subtool) WARNING: skipping local_var={local_var} /" - f" target_var={target_var}") + print(f"(cmor_run_subtool) WARNING: skipping local_var = {local_var} /\n" + f" target_var = {var_list[local_var]}") print( "(cmor_run_subtool) ... target_var not found in CMOR variable group") continue + if all( [ opt_var_name is not None, + local_var != opt_var_name ] ): + print(f'(cmor_run_subtool) WARNING: skipping local_var={local_var} as it is not equal' + ' to the opt_var_name argument.') + continue + # it is in there, get the name of the data inside the netcdf file. target_var=var_list[local_var] # often equiv to local_var but not necessarily. if local_var != target_var: @@ -575,8 +606,26 @@ def cmor_run_subtool( indir = None, print(f'i am expecting {local_var} to be in the filename, and i expect the variable' f' in that file to be {target_var}') - print(f'(cmor_run_subtool) ..............beginning CMORization for {local_var}/' - f'{target_var}..........') + + # examine input directory to obtain a list of input file targets + var_filenames = [] + get_var_filenames(indir, var_filenames, local_var) + print(f"(cmor_run_subtool) found filenames = \n {var_filenames}") + + # examine input files to obtain target date ranges + iso_datetime_arr = [] + get_iso_datetimes(var_filenames, iso_datetime_arr) + print(f"(cmor_run_subtool) found iso datetimes = \n {iso_datetime_arr}") + + # name_of_set == component label... + # which is not relevant for CMOR/CMIP... or is it? + name_of_set = var_filenames[0].split(".")[0] + print(f"(cmor_run_subtool) setting name_of_set = {name_of_set}") + + + + print(f'(cmor_run_subtool) ..............beginning CMORization for {local_var}/\n' + f' {target_var}..........') cmorize_target_var_files( indir, target_var, local_var, iso_datetime_arr, # OK name_of_set, json_exp_config, @@ -584,13 +633,17 @@ def cmor_run_subtool( indir = None, proj_table_vars, json_table_config # a little redundant ) + if DEBUG_MODE_RUN_ONE: + print(f'WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') + break + @click.command() def _cmor_run_subtool(indir = None, json_var_list = None, json_table_config = None, json_exp_config = None, - outdir = None): + outdir = None, opt_var_name = None): ''' entry point to fre cmor run for click. see cmor_run_subtool for argument descriptions.''' - return cmor_run_subtool(indir, json_var_list, json_table_config, json_exp_config, outdir) + return cmor_run_subtool(indir, json_var_list, json_table_config, json_exp_config, outdir, opt_var_name) if __name__ == '__main__': diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index f764a546..7815b1ac 100644 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -29,8 +29,12 @@ def cmor_cli(): type=str, help="Output directory", required=True) +@click.option('-v', "--opt_var_name", + type = str, + help="optional variable name filter. if the variable name in the targeted file doesnt match, the variable is skipped", + required=False) @click.pass_context -def run(context, indir, varlist, table_config, exp_config, outdir): +def run(context, indir, varlist, table_config, exp_config, outdir, opt_var_name): # pylint: disable=unused-argument """Rewrite climate model output""" context.invoke( @@ -39,7 +43,8 @@ def run(context, indir, varlist, table_config, exp_config, outdir): json_var_list = varlist, json_table_config = table_config, json_exp_config = exp_config, - outdir = outdir + outdir = outdir, + opt_var_name = opt_var_name ) # context.forward( # _cmor_run_subtool() ) diff --git a/fre/tests/test_files/CMORbite_var_list.json b/fre/tests/test_files/CMORbite_var_list.json new file mode 100644 index 00000000..5b75e54f --- /dev/null +++ b/fre/tests/test_files/CMORbite_var_list.json @@ -0,0 +1,11 @@ +{ + "lai": "lai", + "t_ref": "t_ref", + "cl": "cl", + "mc": "mc", + "ta": "ta", + "sos": "sos", + "so": "so", + "ch4global": "ch4global", + "gppLut": "gppLut" +} diff --git a/run_test_file_cases.py b/run_test_file_cases.py new file mode 100644 index 00000000..27f83ae8 --- /dev/null +++ b/run_test_file_cases.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +''' +this is a quick and dirty script. +it will not be maintained. it will not be supported. +it is for a very context-dependent set of tests for a very specific point in time. +''' + + +import os +from pathlib import Path + +import fre +from fre.cmor.cmor_mixer import cmor_run_subtool as run_cmor + +# global consts for these tests, with no/trivial impact on the results +ROOTDIR='fre/tests/test_files' +CMORBITE_VARLIST=f'{ROOTDIR}/CMORbite_var_list.json' + +# this file exists basically for users to specify their own information to append to the netcdf file +# i.e., it fills in FOO/BAR/BAZ style values, and what they are currently is totally irrelevant +EXP_CONFIG_DEFAULT=f'{ROOTDIR}/CMOR_input_example.json' # this likely is not sufficient + + +def run_cmor_RUN(filename, table, opt_var_name): + run_cmor( + indir = str(Path(filename).parent), + json_var_list = CMORBITE_VARLIST, + json_table_config = f'{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json', + json_exp_config = EXP_CONFIG_DEFAULT, + outdir = os.getcwd(), # dont keep it this way... + opt_var_name = opt_var_name + ) + return + + +## 1) +## land, Lmon, gr1 +## Result - one file debug mode success, but the exp_config has the wrong grid, amongst other thinhgs?> +#testfile_land_gr1_Lmon = \ +# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' +#run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') +##assert False + + +## This file's variable isn't in any cmip6 table... +#### atmos, Amon, gr1 +#### Result - one file debug mode, NULL +###testfile_atmos_gr1_Amon = \ +### '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/atmos/ts/monthly/5yr/atmos.000101-000512.t_ref.nc' +###run_cmor_RUN(testfile_atmos_gr1_Amon, 'Amon', opt_var_name = 't_ref') +###assert False + +## 2) +## native vertical atmos, (Amon, AERmon: gr1), just like above, but with nontrivial vertical levels? +## this one is more typical, on the FULL ATMOS LEVELS +## Amon / cl +## Result - error, UnboundLocalError: local variable 'cmor_lev' referenced before assignment (ps file handing double check!!!) +## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, +#testfile_atmos_level_cmip_gr1_Amon_complex_vert = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.196001-196412.cl.nc' +#run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') +#assert False + +## 3) +## this one is on the ATMOS HALF-LEVELS +## Amon / mc +## Result - error, UnboundLocalError: local variable 'cmor_lev' referenced before assignment (ps file handing double check!!!) +## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpatmos_level_cmip.185001-185412.mc.nc +#testfile_atmos_level_cmip_gr1_Amon_fullL = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.195501-195912.mc.nc' +#run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') +#assert False + +## 4) +## zonal averages. AmonZ... no AmonZ table though??? +## !!!REPLACING AmonZ w/ Amon!!! +## just like #1, but lack longitude +## Result - error, lat/lon hardcoding as chris was saying would break: File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / +## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpatmos_plev39_cmip.185001-185412.ta.nc +#testfile_atmos_gr1_AmonZ_nolons = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' +#run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') +#assert False + +# 5) +# ocean regridded, gr. seaice could be slightly different (Omon?) #TODO +# Result - success WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, +testfile_ocean_monthly_1x1deg_gr = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' +run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') +assert False + +# ocean native, gn. seaice could be slightly different (Omon?) #TODO +testfile_ocean_monthly_gn = \ + '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' +run_cmor_RUN(testfile_, '', opt_var_name = 'sos') +assert False + +# 6) +# ocean 3D, either. seaice could be slightly different (Omon?) #TODO +# just like #4 and #5, analogous to #2 (this is kinda funny... zonal averaged, horizontally regridded but maybe not, w/ native vertical levels (half or full?)? +# this one is regridded (1x1 deg was regrid above so it's not the native resolution) +# Result - , +testfile_ocean_monthly_z_1x1deg_gr = \ + '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' +run_cmor_RUN(testfile_, '', opt_var_name = 'so') +assert False + +# 7) +# global scalars, gn, e.g. Amon +# lack longitude and latitude +# Result - , +testfile_atmos_scalar_gn_Amon_nolon_nolat = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' +run_cmor_RUN(testfile_, '', opt_var_name = 'ch4global') +assert False + +# 8) +# phase 2L landuse land output, gr1, e.g. Emon +# “landuse” as a dimension +# Result - , +testfile_LUmip_refined_gr1_Emon_landusedim = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' +run_cmor_RUN(testfile_, '', opt_var_name = None) +assert False + + + From 493c4dd882ffb73be60f54985161331e4604c557 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Thu, 24 Oct 2024 13:40:38 -0400 Subject: [PATCH 02/12] having run all test cases, a theme of failures is starting to emerge... --- run_test_file_cases.py | 86 +++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/run_test_file_cases.py b/run_test_file_cases.py index 27f83ae8..a22c9d9e 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -33,7 +33,7 @@ def run_cmor_RUN(filename, table, opt_var_name): return -## 1) +## 1) SUCCEEDs ## land, Lmon, gr1 ## Result - one file debug mode success, but the exp_config has the wrong grid, amongst other thinhgs?> #testfile_land_gr1_Lmon = \ @@ -42,15 +42,7 @@ def run_cmor_RUN(filename, table, opt_var_name): ##assert False -## This file's variable isn't in any cmip6 table... -#### atmos, Amon, gr1 -#### Result - one file debug mode, NULL -###testfile_atmos_gr1_Amon = \ -### '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/atmos/ts/monthly/5yr/atmos.000101-000512.t_ref.nc' -###run_cmor_RUN(testfile_atmos_gr1_Amon, 'Amon', opt_var_name = 't_ref') -###assert False - -## 2) +## 2) FAIL ## native vertical atmos, (Amon, AERmon: gr1), just like above, but with nontrivial vertical levels? ## this one is more typical, on the FULL ATMOS LEVELS ## Amon / cl @@ -61,7 +53,7 @@ def run_cmor_RUN(filename, table, opt_var_name): #run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') #assert False -## 3) +## 3) FAIL ## this one is on the ATMOS HALF-LEVELS ## Amon / mc ## Result - error, UnboundLocalError: local variable 'cmor_lev' referenced before assignment (ps file handing double check!!!) @@ -71,7 +63,7 @@ def run_cmor_RUN(filename, table, opt_var_name): #run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') #assert False -## 4) +## 4) FAIL ## zonal averages. AmonZ... no AmonZ table though??? ## !!!REPLACING AmonZ w/ Amon!!! ## just like #1, but lack longitude @@ -82,46 +74,52 @@ def run_cmor_RUN(filename, table, opt_var_name): #run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') #assert False -# 5) -# ocean regridded, gr. seaice could be slightly different (Omon?) #TODO -# Result - success WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, -testfile_ocean_monthly_1x1deg_gr = \ - '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' -run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') -assert False +## 5) PARTIAL FAIL +## ocean regridded, gr. seaice could be slightly different (Omon?) #TODO +## Result - success WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, +#testfile_ocean_monthly_1x1deg_gr = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' +#run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') +#assert False -# ocean native, gn. seaice could be slightly different (Omon?) #TODO -testfile_ocean_monthly_gn = \ - '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' -run_cmor_RUN(testfile_, '', opt_var_name = 'sos') -assert False +## ocean native, gn. seaice could be slightly different (Omon?) #TODO +## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists. +#testfile_ocean_monthly_gn = \ +# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' +#run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') +#assert False -# 6) -# ocean 3D, either. seaice could be slightly different (Omon?) #TODO -# just like #4 and #5, analogous to #2 (this is kinda funny... zonal averaged, horizontally regridded but maybe not, w/ native vertical levels (half or full?)? -# this one is regridded (1x1 deg was regrid above so it's not the native resolution) -# Result - , -testfile_ocean_monthly_z_1x1deg_gr = \ - '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' -run_cmor_RUN(testfile_, '', opt_var_name = 'so') -assert False +## 6) FAIL +## ocean 3D, either. seaice could be slightly different (Omon?) #TODO +## just like #4 and #5, analogous to #2 (this is kinda funny... zonal averaged, horizontally regridded but maybe not, w/ native vertical levels (half or full?)? +## this one is regridded (1x1 deg was regrid above so it's not the native resolution) +## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists +#testfile_ocean_monthly_z_1x1deg_gr = \ +# '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' +#run_cmor_RUN(testfile_ocean_monthly_z_1x1deg_gr, 'Omon', opt_var_name = 'so') +#assert False -# 7) -# global scalars, gn, e.g. Amon -# lack longitude and latitude -# Result - , -testfile_atmos_scalar_gn_Amon_nolon_nolat = \ - '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' -run_cmor_RUN(testfile_, '', opt_var_name = 'ch4global') -assert False +## 7) FAIL +## global scalars, gn, e.g. Amon +## lack longitude and latitude +## Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / +#testfile_atmos_scalar_gn_Amon_nolon_nolat = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' +#run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') +#assert False -# 8) +# 8) FAIL # phase 2L landuse land output, gr1, e.g. Emon # “landuse” as a dimension -# Result - , +# Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): +# File "src/netCDF4/_netCDF4.pyx", line 4932, in netCDF4._netCDF4.Variable.__getattr__ +# File "src/netCDF4/_netCDF4.pyx", line 4654, in netCDF4._netCDF4.Variable.getncattr +# File "src/netCDF4/_netCDF4.pyx", line 1617, in netCDF4._netCDF4._get_att +# File "src/netCDF4/_netCDF4.pyx", line 2113, in netCDF4._netCDF4._ensure_nc_success +# AttributeError: NetCDF: Attribute not found testfile_LUmip_refined_gr1_Emon_landusedim = \ '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' -run_cmor_RUN(testfile_, '', opt_var_name = None) +run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') assert False From 153a779a7ae612a404a817e88a093397f593e299 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Fri, 25 Oct 2024 12:24:43 -0400 Subject: [PATCH 03/12] add print statement to wrapper function for easier cli debugging calls --- run_test_file_cases.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/run_test_file_cases.py b/run_test_file_cases.py index a22c9d9e..04234bc7 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -6,6 +6,7 @@ ''' +import sys import os from pathlib import Path @@ -22,7 +23,18 @@ def run_cmor_RUN(filename, table, opt_var_name): - run_cmor( + func_debug = True + if func_debug: + print('run_cmor(' + f' indir = {str(Path(filename).parent)},' + f' json_var_list = {CMORBITE_VARLIST},' + f' json_table_config = {ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json,' + f' json_exp_config = {EXP_CONFIG_DEFAULT},' + f' outdir = {os.getcwd()},' + f' opt_var_name = opt_var_name' + ')' + ) + FOO_return = run_cmor( indir = str(Path(filename).parent), json_var_list = CMORBITE_VARLIST, json_table_config = f'{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json', @@ -30,16 +42,18 @@ def run_cmor_RUN(filename, table, opt_var_name): outdir = os.getcwd(), # dont keep it this way... opt_var_name = opt_var_name ) - return + return FOO_return -## 1) SUCCEEDs -## land, Lmon, gr1 -## Result - one file debug mode success, but the exp_config has the wrong grid, amongst other thinhgs?> -#testfile_land_gr1_Lmon = \ -# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' -#run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') -##assert False +# 1) SUCCEEDs +# land, Lmon, gr1 +# Result - one file debug mode success, but the exp_config has the wrong grid, amongst other thinhgs?> +testfile_land_gr1_Lmon = \ + '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' +some_return = run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') +print(f'some_return={some_return}') +sys.exit() +#assert False ## 2) FAIL From 62852833ecc7f42c3fbae1433fbaf341bba469cb Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Fri, 25 Oct 2024 12:27:53 -0400 Subject: [PATCH 04/12] add comments AND constants detailing what kind of hardcoded things the code currently relies on. unsure how useful the exact way i wrote things down is, but im hoping ot leverage it for structure and or setting up some formulaic approaches and or rules. additionally, merge multiline printstateaments into one call using legal but maybe awkward looking multiline syntax, some other readability edits too --- fre/cmor/cmor_mixer.py | 231 ++++++++++++++++++++++++++++------------- 1 file changed, 158 insertions(+), 73 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index bfbec946..65a71469 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -14,9 +14,106 @@ import click import cmor +# ------ \start assumptions / potential future configuration thingies. +# GLOBAL hardcoded assumption: netcdf files always have an ending of '.nc' +# many implicit assumptions regarding the presence of metadata in the input netcdf file name +# e.g. datetime, variable name, name_of_set are all assumed to be at particular spots +# utf8 encoding +# MINOR: opening netcdf files in append or write or read +# MINOR: key names in certain input configuration files- these are tightly controlled +# +# for check_dataset_for_ocean_grids: +# input reading/checking hardcode - dataset doesn't have a variable named 'xh' +# for get_vertical_dimension: +# input reading/checking hardcode - dataset has dimension/axis name 'Z' +# for create_tmp_dir: +# input reading/checking hardcode - check output directory for specific drives local2, work, net +# output moving/writing hardcode - tmpdir name is 'tmp' if condition met +# +# for rewrite_netcdf_file_var: +# input reading/checking hardcode - dimensions named 'lat', 'lon', 'time' +# input reading/checking hardcode - "bounds" for above, named 'lat_bnds', 'lon_bnds', 'time_bnds' +# input reading/checking hardcode - check that var_dim is 3 or 4 +# input reading/checking hardcode - check that var_dim is 3 --> simple 3 dim subcase +# input reading/checking hardcode - if var_dim is 4, vert_dim must be one of the following: +# "plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf" +# input reading/checking hardcode - then subcases are relevant as follows: +# if vert_dim in ["plev30", "plev19", "plev8", "height2m"] --> SUBCASE +# elif vert_dim in ["level", "lev", "levhalf"] --> DISTINCT SUBCASE +# pressure input file is tagged with 'ps' potentially nearby +# sub_sub_case --> if vert_dim is lev_half +# input ds has zfactor values as 'ap_bnds', 'b_bnds' +# output moving/writing hardcode - output zfactors have names "ap_half", "b_half", +# output vertical level axis name "alternate_hybrid_sigma_half" +# sub_sub_case --> else +# input ds has zfactor values as 'ap' and 'b', and zfactor bnds as 'ap_bnds', 'b_bnds' +# output moving/writing hardcode - output zfactors have names "ap", "b", +# output vertical level axis name "alternate_hybrid_sigma" +# output moving/writing hardcode - output interpolated pressures have name "ps", units "Pa" +# output moving/writing hardcode - cmor setup parameters +# output moving/writing hardcode - lat/lon axies named "latitude"/"longitude" with units "degrees_N" "degrees_E" +# +# for cmorize_target_var_files: +# input reading/checking hardcode - pressure input file is tagged with 'ps' potentially nearby +# output moving/writing hardcode - pressure out file is named with 'ps' +# + + # ----- \start consts -DEBUG_MODE_RUN_ONE=True # +DEBUG_MODE_RUN_ONE = True + +# +INPUT_READ_PS_FILE_VAR_NAME = 'ps' +INPUT_READDIR_NAME_CHECKS = [ [ 'contains', '/work'], + ['contains', '/net'], + ['equal', '/local2'] ] + +# +OUTPUT_TEMP_DIR_NAME = 'tmp' + +# +INPUT_READ_OCEAN_GRID_VAR_NAME = 'xh' +INPUT_READ_Z_AXIS_NAME = 'Z' + +# +INPUT_READ_LAT_DIM = 'lat' +INPUT_READ_LAT_BNDS = 'lat_bnds' +INPUT_READ_LON_DIM = 'lon' +INPUT_READ_LON_BNDS = 'lon_bnds' +INPUT_READ_TIME_DIM = 'time' +INPUT_READ_TIME_BNDS = 'time_bnds' + +# +INPUT_ACCEPT_VAR_DIMS = [3,4] +INPUT_ACCEPT_VERT_DIMS = ["plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"] + +# ---- +#INPUT_SUBCASE1_VAR_VERT_DIMS = { "4": [ "plev30", "plev19", "plev8", "height2m" ] } + +# ---- +INPUT_SUBCASE2_VAR_VERT_DIMS = { "4": [ "level", "lev", "levhalf" ] } +#- +OUTPUT_SUBCASE2_PS_VAR_NAME = 'ps' +OUTPUT_SUBCASE2_PS_VAR_UNIT = 'Pa' + +# --- --- +INPUT_SUBCASE2_0_VAR_VERT_DIMS = { "4": [ "levhalf" ] } +INPUT_SUBCASE2_0_ZFACT_VALS = ['ap_bnds','b_bnds'] +#- +OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap_half','b_half'] +OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma_half' + +# --- --- +INPUT_SUBCASE2_1_VAR_VERT_DIMS = { "4": [ "level", "lev" ] } +INPUT_SUBCASE2_1_ZFACT_VALS = ['ap','b'] +INPUT_SUBCASE2_1_ZFACT_BNDS = ['ap_bnds','b_bnds'] +#- +OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap','b'] +OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma' + + + # ----- \end consts @@ -28,8 +125,8 @@ def copy_nc(in_nc, out_nc): in_nc: string, path to an input netcdf file we wish to copy out_nc: string, an output path to copy the targeted input netcdf file to ''' - print(f'(copy_nc) in_nc: {in_nc}') - print(f'(copy_nc) out_nc: {out_nc}') + print(f'(copy_nc) in_nc: {in_nc}\n' + f' out_nc: {out_nc}') # input file dsin = nc.Dataset(in_nc) @@ -100,7 +197,7 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None): iso_datetime_arr.sort() #print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}") if len(iso_datetime_arr) < 1: - raise ValueError('ERROR: iso_datetime_arr has length 0!') + raise ValueError('(get_iso_datetimes) ERROR: iso_datetime_arr has length 0!') def check_dataset_for_ocean_grid(ds): ''' @@ -108,12 +205,10 @@ def check_dataset_for_ocean_grid(ds): one argument. this function has no return. ds: netCDF4.Dataset object containing variables with associated dimensional information. ''' - #print(f'(check_dataset_for_ocean_grid) {ds}') - #print(f'(check_dataset_for_ocean_grid) {ds.variables}') - #print(f'(check_dataset_for_ocean_grid) {ds.variables.keys()}') if "xh" in list(ds.variables.keys()): raise NotImplementedError( - "'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + "(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + def get_vertical_dimension(ds,target_var): ''' @@ -136,7 +231,6 @@ def get_vertical_dimension(ds,target_var): vert_dim = dim return vert_dim - def create_tmp_dir(outdir): ''' creates a tmp_dir based on targeted output directory root. returns the name of the tmp dir. @@ -145,20 +239,20 @@ def create_tmp_dir(outdir): file output. tmp_dir will be slightly different depending on the output directory targeted ''' - print(f"(cmorize_target_var_files) outdir = {outdir}") + print(f"(create_tmp_dir) outdir = {outdir}") tmp_dir = None if any( [ outdir == "/local2", outdir.find("/work") != -1, outdir.find("/net" ) != -1 ] ): - print(f'(cmorize_target_var_files) using /local /work /net ( tmp_dir = {outdir}/ )') + print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )') tmp_dir = str( Path("{outdir}/").resolve() ) else: - print('(cmorize_target_var_files) NOT using /local /work /net (tmp_dir = outdir/tmp/ )') + print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )') tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: - raise OSError('problem creating temp output directory. stop.') from exc + raise OSError('(create_tmp_dir) problem creating temp output directory. stop.') from exc return tmp_dir @@ -174,8 +268,8 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ''' print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') print( "(rewrite_netcdf_file_var) input data: " ) - print(f"(rewrite_netcdf_file_var) local_var = {local_var}" ) - print(f"(rewrite_netcdf_file_var) target_var = {target_var}") + print(f" local_var = {local_var}" ) + print(f" target_var = {target_var}") # open the input file @@ -213,24 +307,23 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") - - - - # read the input... units? + # read the input variable data, i believe var = ds[target_var][:] - # determine the vertical dimension by looping over netcdf variables - vert_dim = get_vertical_dimension(ds,target_var) #0#vert_dim = None + vert_dim = get_vertical_dimension(ds, target_var) print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") - - # Check var_dim, vert_dim + # grab var_dim var_dim = len(var.shape) + print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") + + # Check var_dim if var_dim not in [3, 4]: raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") - # check for vert_dim error condition. if pass, assign lev for later use. + # Check var_dim and vert_dim and assign lev if relevant. + # error if vert_dim wrong given var_dim lev = None if var_dim == 4: if vert_dim not in [ "plev30", "plev19", "plev8", @@ -238,9 +331,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] - print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") - - # now we set up the cmor module object # initialize CMOR @@ -260,7 +350,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") cmor.load_table(json_table_config) - #units = proj_table_vars["variable_entry"] [local_var] ["units"] units = proj_table_vars["variable_entry"] [target_var] ["units"] print(f"(rewrite_netcdf_file_var) units={units}") @@ -268,15 +357,14 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") try: print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" - f"(rewrite_netcdf_file_var) coord_vals = \n{time_coords}, \n" - f"(rewrite_netcdf_file_var) cell_bounds = time_bnds, units = {time_coord_units}) ") + f" coord_vals = \n{time_coords}, \n" + f" cell_bounds = time_bnds, units = {time_coord_units}) ") cmor_time = cmor.axis("time", coord_vals = time_coords, cell_bounds = time_bnds, units = time_coord_units) - #cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) except ValueError as exc: - print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}") - print( "(rewrite_netcdf_file_var) cmor_time = cmor.axis('time', " - "coord_vals = time_coords, units = time_coord_units)") + print(f"(rewrite_netcdf_file_var) WARNING exception raised... exc={exc}\n" + " cmor_time = cmor.axis('time', \n" + " coord_vals = time_coords, units = time_coord_units)") cmor_time = cmor.axis("time", coord_vals = time_coords, units = time_coord_units) # initializations @@ -336,8 +424,8 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, units = lev.units, cell_bounds = ds[vert_dim+"_bnds"] ) - print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}') - print(f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}') + print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' + f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) ips = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ps", axis_ids = [cmor_time, cmor_lat, cmor_lon], @@ -359,9 +447,9 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, cmor.write(cmor_var, var) if save_ps: if any( [ ips is None, ps is None ] ): - print( 'WARNING: ps or ips is None!, but save_ps is True!') - print(f'ps = {ps}, ips = {ips}') - print( 'skipping ps writing!') + print( '(rewrite_netcdf_file_var) WARNING: ps or ips is None!, but save_ps is True!\n' + f' ps = {ps}, ips = {ips}\n' + ' skipping ps writing!' ) else: cmor.write(ips, ps, store_with = cmor_var) cmor.close(ips, file_name = True, preserve = False) @@ -398,10 +486,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, ''' print('\n\n-------------------------- START cmorize_target_var_files call -----') - print(f"(cmorize_target_var_files) local_var = {local_var} to be used for file-targeting.") - print(f"(cmorize_target_var_files) target_var = {target_var} to be used for reading the data " - "from the file") - print(f"(cmorize_target_var_files) outdir = {outdir}") + print(f"(cmorize_target_var_files) local_var = {local_var} to be used for file-targeting.\n" + f" target_var = {target_var} to be used for reading the data \n" + " from the file\n" + f" outdir = {outdir}") #determine a tmp dir for working on files. @@ -413,10 +501,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, nc_fls = {} for i, iso_datetime in enumerate(iso_datetime_arr): - print("\n\n==== REEXAMINE THIS file movement ====================================") - # why is nc_fls a filled list/array/object thingy here? see above line - #nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{target_var}.nc" nc_fls[i] = f"{indir}/{name_of_set}.{iso_datetime}.{local_var}.nc" print(f"(cmorize_target_var_files) input file = {nc_fls[i]}") if not Path(nc_fls[i]).exists(): @@ -425,9 +510,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, # create a copy of the input file with local var name into the work directory - #nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{target_var}.nc" nc_file_work = f"{tmp_dir}{name_of_set}.{iso_datetime}.{local_var}.nc" - + print(f"(cmorize_target_var_files) nc_file_work = {nc_file_work}") copy_nc( nc_fls[i], nc_file_work) @@ -437,10 +521,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, if Path(nc_ps_file).exists(): print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") copy_nc(nc_ps_file, nc_ps_file_work) - print("\n\n==== REEXAMINE THIS file movement ====================================") - # now we have a file in our targets, point CMOR to the configs and the input file(s) + # TODO think of better way to write this kind of conditional data movement... + # now we have a file in our targets, point CMOR to the configs and the input file(s) make_cmor_write_here = None print( Path( tmp_dir ) ) print( Path( os.getcwd() ) ) @@ -451,10 +535,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, print(f'tmp_dir is relative to CWD!') make_cmor_write_here = os.getcwd() + '/'+tmp_dir # unavoidable, cmor module FORCES write to CWD assert make_cmor_write_here is not None - + gotta_go_back_here=os.getcwd() try: - print(f"cd'ing to \n {make_cmor_write_here}" ) + print(f"cd'ing to \n {make_cmor_write_here}" ) os.chdir( make_cmor_write_here ) except: raise OSError(f'could not chdir to {make_cmor_write_here}') @@ -466,7 +550,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, target_var , json_exp_config , json_table_config ) - os.chdir(gotta_go_back_here) + os.chdir( gotta_go_back_here ) # now that CMOR has rewritten things... we can take our post-rewriting actions @@ -487,9 +571,9 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, # hmm.... this is making issues for pytest mv_cmd = f"mv {tmp_dir}/{local_file_name} {filedir}" print(f"(cmorize_target_var_files) moving files...\n {mv_cmd}") - subprocess.run(mv_cmd, shell=True, check=True) + subprocess.run(mv_cmd, shell = True, check = True) - # ------ refactor this into function? TODO + # ------ refactor this into function? #TODO # ------ what is the use case for this logic really?? filename_no_nc = filename[:filename.rfind(".nc")] chunk_str = filename_no_nc[-6:] @@ -499,7 +583,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, filename_corr = "{filename[:filename.rfind('.nc')]}_{iso_datetime}.nc" mv_cmd = f"mv {filename} {filename_corr}" print(f"(cmorize_target_var_files) moving files, strange chunkstr logic...\n {mv_cmd}") - subprocess.run(mv_cmd, shell=True, check=True) + subprocess.run(mv_cmd, shell = True, check = True) # ------ end refactor this into function? # delete files in work dirs @@ -535,18 +619,19 @@ def cmor_run_subtool( indir = None, json_table_config: json file containing CMIP-compliant per-variable/metadata for specific MIP table. The MIP table can generally be identified by the specific filename (e.g. "Omon") - json_exp_config: json file containing other configuration details (FILL IN TO DO #TODO) + json_exp_config: json file containing metadata dictionary for CMORization. this metadata is effectively + appended to the final output file's header outdir: string, directory root that will contain the full output and output directory structure generated by the cmor module upon request. opt_var_name: string, optional, specify a variable name to specifically process only filenames matching that variable name. I.e., this string help target local_vars, not target_vars. ''' - print(locals()) + # check req'd inputs if None in [indir, json_var_list, json_table_config, json_exp_config, outdir]: - raise ValueError(f'all input arguments are required!\n' - '[indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' - f'[{indir}, {json_var_list}, {json_table_config}, ' - '{json_exp_config}, {outdir}]' ) + raise ValueError(f'(cmor_run_subtool) all input arguments except opt_var_name are required!\n' + ' [indir, json_var_list, json_table_config, json_exp_config, outdir] = \n' + f' [{indir}, {json_var_list}, {json_table_config}, ' + ' {json_exp_config}, {outdir}]' ) # open CMOR table config file print( '(cmor_run_subtool) getting table variables from json_table_config = \n' @@ -554,7 +639,7 @@ def cmor_run_subtool( indir = None, try: with open( json_table_config, "r", encoding = "utf-8") as table_config_file: proj_table_vars=json.load(table_config_file) - + except Exception as exc: raise FileNotFoundError( f'ERROR: json_table_config file cannot be opened.\n' @@ -574,37 +659,37 @@ def cmor_run_subtool( indir = None, f'ERROR: json_var_list file cannot be opened.\n' f' json_var_list = {json_var_list}' ) from exc - # make sure the exp config exists too while we're at it... + # make sure the exp config exists too while we're at it... if Path(json_exp_config).exists(): # if so, resolve to absolute path json_exp_config = str( Path( json_exp_config).resolve() ) else: raise FileNotFoundError( f'ERROR: json_exp_config file cannot be opened.\n' f' json_exp_config = {json_exp_config}' ) - + # loop over entries in the json_var_list, read into var_list for local_var in var_list: # if its not in the table configurations variable_entry list, skip if var_list[local_var] not in proj_table_vars["variable_entry"]: print(f"(cmor_run_subtool) WARNING: skipping local_var = {local_var} /\n" - f" target_var = {var_list[local_var]}") - print( "(cmor_run_subtool) ... target_var not found in CMOR variable group") + f" target_var = {var_list[local_var]}\n" + " ... target_var not found in CMOR variable group") continue if all( [ opt_var_name is not None, local_var != opt_var_name ] ): - print(f'(cmor_run_subtool) WARNING: skipping local_var={local_var} as it is not equal' + print(f'(cmor_run_subtool) WARNING: skipping local_var={local_var} as it is not equal\n' ' to the opt_var_name argument.') continue - + # it is in there, get the name of the data inside the netcdf file. target_var=var_list[local_var] # often equiv to local_var but not necessarily. if local_var != target_var: - print(f'(cmor_run_subtool) WARNING: local_var == {local_var} ' - f'!= {target_var} == target_var') - print(f'i am expecting {local_var} to be in the filename, and i expect the variable' - f' in that file to be {target_var}') + print(f'(cmor_run_subtool) WARNING: local_var == {local_var} \n' + f' != {target_var} == target_var\n' + f' i am expecting {local_var} to be in the filename, and i expect the variable\n' + f' in that file to be {target_var}') # examine input directory to obtain a list of input file targets From 4b313384a8c0cda3bb4eab2d398d99d3ada8a93c Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Fri, 25 Oct 2024 13:14:09 -0400 Subject: [PATCH 05/12] alrightly, lets move some of this hardcoding to the top of the file, see how it goes... --- fre/cmor/cmor_mixer.py | 206 +++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 129 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 65a71469..6650238b 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -14,50 +14,6 @@ import click import cmor -# ------ \start assumptions / potential future configuration thingies. -# GLOBAL hardcoded assumption: netcdf files always have an ending of '.nc' -# many implicit assumptions regarding the presence of metadata in the input netcdf file name -# e.g. datetime, variable name, name_of_set are all assumed to be at particular spots -# utf8 encoding -# MINOR: opening netcdf files in append or write or read -# MINOR: key names in certain input configuration files- these are tightly controlled -# -# for check_dataset_for_ocean_grids: -# input reading/checking hardcode - dataset doesn't have a variable named 'xh' -# for get_vertical_dimension: -# input reading/checking hardcode - dataset has dimension/axis name 'Z' -# for create_tmp_dir: -# input reading/checking hardcode - check output directory for specific drives local2, work, net -# output moving/writing hardcode - tmpdir name is 'tmp' if condition met -# -# for rewrite_netcdf_file_var: -# input reading/checking hardcode - dimensions named 'lat', 'lon', 'time' -# input reading/checking hardcode - "bounds" for above, named 'lat_bnds', 'lon_bnds', 'time_bnds' -# input reading/checking hardcode - check that var_dim is 3 or 4 -# input reading/checking hardcode - check that var_dim is 3 --> simple 3 dim subcase -# input reading/checking hardcode - if var_dim is 4, vert_dim must be one of the following: -# "plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf" -# input reading/checking hardcode - then subcases are relevant as follows: -# if vert_dim in ["plev30", "plev19", "plev8", "height2m"] --> SUBCASE -# elif vert_dim in ["level", "lev", "levhalf"] --> DISTINCT SUBCASE -# pressure input file is tagged with 'ps' potentially nearby -# sub_sub_case --> if vert_dim is lev_half -# input ds has zfactor values as 'ap_bnds', 'b_bnds' -# output moving/writing hardcode - output zfactors have names "ap_half", "b_half", -# output vertical level axis name "alternate_hybrid_sigma_half" -# sub_sub_case --> else -# input ds has zfactor values as 'ap' and 'b', and zfactor bnds as 'ap_bnds', 'b_bnds' -# output moving/writing hardcode - output zfactors have names "ap", "b", -# output vertical level axis name "alternate_hybrid_sigma" -# output moving/writing hardcode - output interpolated pressures have name "ps", units "Pa" -# output moving/writing hardcode - cmor setup parameters -# output moving/writing hardcode - lat/lon axies named "latitude"/"longitude" with units "degrees_N" "degrees_E" -# -# for cmorize_target_var_files: -# input reading/checking hardcode - pressure input file is tagged with 'ps' potentially nearby -# output moving/writing hardcode - pressure out file is named with 'ps' -# - # ----- \start consts @@ -65,15 +21,10 @@ # INPUT_READ_PS_FILE_VAR_NAME = 'ps' -INPUT_READDIR_NAME_CHECKS = [ [ 'contains', '/work'], - ['contains', '/net'], - ['equal', '/local2'] ] - -# OUTPUT_TEMP_DIR_NAME = 'tmp' # -INPUT_READ_OCEAN_GRID_VAR_NAME = 'xh' +INPUT_READ_TAG_OCEAN_GRID_VAR = 'xh' INPUT_READ_Z_AXIS_NAME = 'Z' # @@ -85,36 +36,26 @@ INPUT_READ_TIME_BNDS = 'time_bnds' # -INPUT_ACCEPT_VAR_DIMS = [3,4] -INPUT_ACCEPT_VERT_DIMS = ["plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"] - -# ---- -#INPUT_SUBCASE1_VAR_VERT_DIMS = { "4": [ "plev30", "plev19", "plev8", "height2m" ] } - -# ---- -INPUT_SUBCASE2_VAR_VERT_DIMS = { "4": [ "level", "lev", "levhalf" ] } -#- -OUTPUT_SUBCASE2_PS_VAR_NAME = 'ps' -OUTPUT_SUBCASE2_PS_VAR_UNIT = 'Pa' - -# --- --- -INPUT_SUBCASE2_0_VAR_VERT_DIMS = { "4": [ "levhalf" ] } -INPUT_SUBCASE2_0_ZFACT_VALS = ['ap_bnds','b_bnds'] -#- -OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap_half','b_half'] -OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma_half' - -# --- --- -INPUT_SUBCASE2_1_VAR_VERT_DIMS = { "4": [ "level", "lev" ] } -INPUT_SUBCASE2_1_ZFACT_VALS = ['ap','b'] -INPUT_SUBCASE2_1_ZFACT_BNDS = ['ap_bnds','b_bnds'] -#- -OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap','b'] -OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma' +INPUT_READ_AP_ZFACTS = ['ap','ap_bnds'] +INPUT_READ_B_ZFACTS = ['b','b_bnds'] +# +OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma" +OUTPUT_WRITE_AP_ZFACTS_NAME = "ap" +OUTPUT_WRITE_B_ZFACTS_NAME = "b" +# +OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma_half" +OUTPUT_WRITE_AP_ZFACTS_HALF_NAME = "ap_half" +OUTPUT_WRITE_B_ZFACTS_HALF_NAME = "b_half" +# +INPUT_ACCEPT_VAR_DIMS = [3,4] +INPUT_ACCEPT_VERT_DIMS = ['plev30', 'plev19', 'plev8','height2m', 'level', 'lev', 'levhalf'] +# +OUTPUT_PS_VAR_NAME = 'ps' +OUTPUT_PS_VAR_UNIT = 'Pa' # ----- \end consts ### ------ helper functions ------ ### @@ -135,7 +76,7 @@ def copy_nc(in_nc, out_nc): # note- totally infuriating... # the correct value for the format arg is netCDF4.Dataset.data_model # and NOT netCDF4.Dataset.disk_format - dsout = nc.Dataset(out_nc, "w", + dsout = nc.Dataset(out_nc, 'w', format = dsin.data_model) #Copy dimensions @@ -190,10 +131,10 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None): if iso_datetime_arr is None: iso_datetime_arr = [] for filename in var_filenames: - iso_datetime = filename.split(".")[1] + iso_datetime = filename.split('.')[1] if iso_datetime not in iso_datetime_arr: iso_datetime_arr.append( - filename.split(".")[1] ) + filename.split('.')[1] ) iso_datetime_arr.sort() #print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}") if len(iso_datetime_arr) < 1: @@ -205,9 +146,10 @@ def check_dataset_for_ocean_grid(ds): one argument. this function has no return. ds: netCDF4.Dataset object containing variables with associated dimensional information. ''' - if "xh" in list(ds.variables.keys()): + if INPUT_READ_TAG_OCEAN_GRID_VAR in list(ds.variables.keys()): raise NotImplementedError( - "(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") + "(check_dataset_for_ocean_grid) {INPUT_READ_TAG_OCEAN_GRID_VAR} " + "found in var_list. ocean grid req'd but not yet unimplemented. stop.") def get_vertical_dimension(ds,target_var): @@ -226,7 +168,7 @@ def get_vertical_dimension(ds,target_var): dims = variable.dimensions for dim in dims: # if it is not a vertical axis, move on. - if not (ds[dim].axis and ds[dim].axis == "Z"): + if not (ds[dim].axis and ds[dim].axis == INPUT_READ_Z_AXIS_NAME): continue vert_dim = dim return vert_dim @@ -241,14 +183,14 @@ def create_tmp_dir(outdir): ''' print(f"(create_tmp_dir) outdir = {outdir}") tmp_dir = None - if any( [ outdir == "/local2", - outdir.find("/work") != -1, - outdir.find("/net" ) != -1 ] ): + if any( [ outdir == '/local2', + outdir.find('/work') != -1, + outdir.find('/net' ) != -1 ] ): print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )') - tmp_dir = str( Path("{outdir}/").resolve() ) + tmp_dir = str( Path(f"{outdir}/").resolve() ) + '/' else: - print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )') - tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) + print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/{OUTPUT_TEMP_DIR_NAME}/ )') + tmp_dir = str( Path(f"{outdir}/{OUTPUT_TMP_DIR_NAME}/").resolve() ) + '/' try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: @@ -264,12 +206,22 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + ''' + rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + accepts six arguments, all required: + proj_table_vars: json dictionary object, variable table read from json_table_config. + local_var: string, variable name used for finding files locally containing target_var, + this argument is often equal to target_var. + netcdf_file: string, representing path to intput netcdf file. + target_var: string, representing the variable name attached to the data object in the netcdf file. + json_exp_config: string, representing path to json configuration file holding metadata for appending to output + this argument is most used for making sure the right grid label is getting attached to the right output + json_table_config: string, representing path to json configuration file holding variable names for a given table. + proj_table_vars is read from this file, but both are passed anyways. ''' - print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') - print( "(rewrite_netcdf_file_var) input data: " ) - print(f" local_var = {local_var}" ) - print(f" target_var = {target_var}") + print( "(rewrite_netcdf_file_var) input data: \n" + f" local_var = {local_var}\n" + f" target_var = {target_var}") # open the input file @@ -285,23 +237,20 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, # figure out the dimension names programmatically TODO # Define lat and lon dimensions # Assume input file is lat/lon grid - lat = ds["lat"][:] - lon = ds["lon"][:] - lat_bnds = ds["lat_bnds"][:] - lon_bnds = ds["lon_bnds"][:] - - ## Define time - #time = ds["time"][:] + lat = ds[INPUT_READ_LAT_DIM ][:] + lon = ds[INPUT_READ_LON_DIM ][:] + lat_bnds = ds[INPUT_READ_LAT_BNDS][:] + lon_bnds = ds[INPUT_READ_LON_BNDS][:] # read in time_coords + units - time_coords = ds["time"][:] - time_coord_units = ds["time"].units + time_coords = ds[INPUT_READ_TIME_DIM][:] + time_coord_units = ds[INPUT_READ_TIME_DIM].units print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") # read in time_bnds , if present time_bnds = [] try: - time_bnds = ds["time_bnds"][:] + time_bnds = ds[INPUT_READ_TIME_BNDS][:] #print(f"(rewrite_netcdf_file_var) time_bnds = {time_bnds}") except ValueError: print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") @@ -319,15 +268,14 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") # Check var_dim - if var_dim not in [3, 4]: - raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") + if var_dim not in INPUT_ACCEPT_VAR_DIMS: + raise ValueError(f"var_dim is not in {INPUT_ACCEPT_VAR_DIMS}...\n stop.\n") # Check var_dim and vert_dim and assign lev if relevant. # error if vert_dim wrong given var_dim lev = None if var_dim == 4: - if vert_dim not in [ "plev30", "plev19", "plev8", - "height2m", "level", "lev", "levhalf"] : + if vert_dim not in INPUT_ACCEPT_VERT_DIMS: raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] @@ -386,40 +334,40 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, elif vert_dim in ["level", "lev", "levhalf"]: # find the ps file nearby - ps_file = netcdf_file.replace(f'.{target_var}.nc', '.ps.nc') + ps_file = netcdf_file.replace(f'.{target_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') ds_ps = nc.Dataset(ps_file) - ps = ds_ps['ps'][:].copy() + ps = ds_ps[INPUT_READ_PS_FILE_VAR_NAME][:].copy() ds_ps.close() # assign lev_half specifics if vert_dim == "lev_half": ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "ap_half", + zfactor_name = OUTPUT_WRITE_AP_ZFACTS_HALF_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds["ap_bnds"][:], - units = ds["ap_bnds"].units ) + zfactor_values = ds[ INPUT_READ_AP_ZFACTS[1] ][:], + units = ds[ INPUT_READ_AP_ZFACTS[1] ].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "b_half", + zfactor_name = OUTPUT_WRITE_B_ZFACTS_HALF_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds["b_bnds"][:], - units = ds["b_bnds"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", + zfactor_values = ds[ INPUT_READ_B_ZFACTS[1] ][:], + units = ds[ INPUT_READ_B_ZFACTS[1] ].units ) + cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME, coord_vals = lev[:], units = lev.units ) else: ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "ap", + zfactor_name = OUTPUT_WRITE_AP_ZFACTS_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds["ap"][:], - zfactor_bounds = ds["ap_bnds"][:], - units = ds["ap"].units ) + zfactor_values = ds[ INPUT_READ_AP_ZFACTS[0] ][:], + zfactor_bounds = ds[ INPUT_READ_AP_ZFACTS[1] ][:], + units = ds[ INPUT_READ_AP_ZFACTS[0] ].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "b", + zfactor_name = OUTPUT_WRITE_ZFACTS_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds["b"][:], - zfactor_bounds = ds["b_bnds"][:], - units = ds["b"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma", + zfactor_values = ds[ INPUT_READ_b_ZFACTS[0] ][:], + zfactor_bounds = ds[ INPUT_READ_b_ZFACTS[1] ][:], + units = ds[ INPUT_READ_b_ZFACTS[0] ].units ) + cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_NAME, coord_vals = lev[:], units = lev.units, cell_bounds = ds[vert_dim+"_bnds"] ) @@ -427,9 +375,9 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) ips = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = "ps", + zfactor_name = OUTPUT_PS_VAR_NAME, axis_ids = [cmor_time, cmor_lat, cmor_lon], - units = "Pa" ) + units = OUTPUT_PS_VAR_UNIT ) save_ps = True # assign axes at end of 4-dim case axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] @@ -516,8 +464,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, copy_nc( nc_fls[i], nc_file_work) # if the ps file exists, we'll copy it to the work directory too - nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', '.ps.nc') - nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', '.ps.nc') + nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') + nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') if Path(nc_ps_file).exists(): print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") copy_nc(nc_ps_file, nc_ps_file_work) From 31996b08b3c2ab3c5510a86606c01ef305383a55 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Fri, 25 Oct 2024 13:26:41 -0400 Subject: [PATCH 06/12] fix undefined variable name errors --- fre/cmor/cmor_mixer.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 6650238b..d530b1df 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -40,7 +40,7 @@ INPUT_READ_B_ZFACTS = ['b','b_bnds'] # -OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma" +OUTPUT_WRITE_AXIS_LEVELS_NAME="alternate_hybrid_sigma" OUTPUT_WRITE_AP_ZFACTS_NAME = "ap" OUTPUT_WRITE_B_ZFACTS_NAME = "b" @@ -190,7 +190,7 @@ def create_tmp_dir(outdir): tmp_dir = str( Path(f"{outdir}/").resolve() ) + '/' else: print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/{OUTPUT_TEMP_DIR_NAME}/ )') - tmp_dir = str( Path(f"{outdir}/{OUTPUT_TMP_DIR_NAME}/").resolve() ) + '/' + tmp_dir = str( Path(f"{outdir}/{OUTPUT_TEMP_DIR_NAME}/").resolve() ) + '/' try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: @@ -206,7 +206,7 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' + ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. accepts six arguments, all required: proj_table_vars: json dictionary object, variable table read from json_table_config. @@ -217,10 +217,10 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, json_exp_config: string, representing path to json configuration file holding metadata for appending to output this argument is most used for making sure the right grid label is getting attached to the right output json_table_config: string, representing path to json configuration file holding variable names for a given table. - proj_table_vars is read from this file, but both are passed anyways. + proj_table_vars is read from this file, but both are passed anyways. ''' - print( "(rewrite_netcdf_file_var) input data: \n" - f" local_var = {local_var}\n" + print( "(rewrite_netcdf_file_var) input data: \n" + f" local_var = {local_var}\n" f" target_var = {target_var}") @@ -362,11 +362,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, zfactor_bounds = ds[ INPUT_READ_AP_ZFACTS[1] ][:], units = ds[ INPUT_READ_AP_ZFACTS[0] ].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_ZFACTS_NAME, + zfactor_name = OUTPUT_WRITE_B_ZFACTS_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_b_ZFACTS[0] ][:], - zfactor_bounds = ds[ INPUT_READ_b_ZFACTS[1] ][:], - units = ds[ INPUT_READ_b_ZFACTS[0] ].units ) + zfactor_values = ds[ INPUT_READ_B_ZFACTS[0] ][:], + zfactor_bounds = ds[ INPUT_READ_B_ZFACTS[1] ][:], + units = ds[ INPUT_READ_B_ZFACTS[0] ].units ) cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_NAME, coord_vals = lev[:], units = lev.units, @@ -477,10 +477,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, print( Path( tmp_dir ) ) print( Path( os.getcwd() ) ) if Path( tmp_dir ).is_absolute(): - print(f'tmp_dir is absolute') + print('tmp_dir is absolute') make_cmor_write_here = tmp_dir elif Path( tmp_dir ).exists(): # relative to where we are - print(f'tmp_dir is relative to CWD!') + print('tmp_dir is relative to CWD!') make_cmor_write_here = os.getcwd() + '/'+tmp_dir # unavoidable, cmor module FORCES write to CWD assert make_cmor_write_here is not None @@ -542,8 +542,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, Path(nc_ps_file_work).unlink() if DEBUG_MODE_RUN_ONE: - print(f'WARNING: DEBUG_MODE_RUN_ONE is True!!!!') - print(f'WARNING: done processing one file!!!') + print('WARNING: DEBUG_MODE_RUN_ONE is True!!!!') + print('WARNING: done processing one file!!!') break @@ -667,7 +667,7 @@ def cmor_run_subtool( indir = None, ) if DEBUG_MODE_RUN_ONE: - print(f'WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') + print('WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') break From ff6de5b7ef12b3285b57c11738d38c8c85ea583d Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Mon, 28 Oct 2024 15:22:57 -0400 Subject: [PATCH 07/12] Revert "fix undefined variable name errors" This reverts commit 31996b08b3c2ab3c5510a86606c01ef305383a55. --- fre/cmor/cmor_mixer.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index d530b1df..6650238b 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -40,7 +40,7 @@ INPUT_READ_B_ZFACTS = ['b','b_bnds'] # -OUTPUT_WRITE_AXIS_LEVELS_NAME="alternate_hybrid_sigma" +OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma" OUTPUT_WRITE_AP_ZFACTS_NAME = "ap" OUTPUT_WRITE_B_ZFACTS_NAME = "b" @@ -190,7 +190,7 @@ def create_tmp_dir(outdir): tmp_dir = str( Path(f"{outdir}/").resolve() ) + '/' else: print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/{OUTPUT_TEMP_DIR_NAME}/ )') - tmp_dir = str( Path(f"{outdir}/{OUTPUT_TEMP_DIR_NAME}/").resolve() ) + '/' + tmp_dir = str( Path(f"{outdir}/{OUTPUT_TMP_DIR_NAME}/").resolve() ) + '/' try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: @@ -206,7 +206,7 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' + ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. accepts six arguments, all required: proj_table_vars: json dictionary object, variable table read from json_table_config. @@ -217,10 +217,10 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, json_exp_config: string, representing path to json configuration file holding metadata for appending to output this argument is most used for making sure the right grid label is getting attached to the right output json_table_config: string, representing path to json configuration file holding variable names for a given table. - proj_table_vars is read from this file, but both are passed anyways. + proj_table_vars is read from this file, but both are passed anyways. ''' - print( "(rewrite_netcdf_file_var) input data: \n" - f" local_var = {local_var}\n" + print( "(rewrite_netcdf_file_var) input data: \n" + f" local_var = {local_var}\n" f" target_var = {target_var}") @@ -362,11 +362,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, zfactor_bounds = ds[ INPUT_READ_AP_ZFACTS[1] ][:], units = ds[ INPUT_READ_AP_ZFACTS[0] ].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_B_ZFACTS_NAME, + zfactor_name = OUTPUT_WRITE_ZFACTS_NAME, axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_B_ZFACTS[0] ][:], - zfactor_bounds = ds[ INPUT_READ_B_ZFACTS[1] ][:], - units = ds[ INPUT_READ_B_ZFACTS[0] ].units ) + zfactor_values = ds[ INPUT_READ_b_ZFACTS[0] ][:], + zfactor_bounds = ds[ INPUT_READ_b_ZFACTS[1] ][:], + units = ds[ INPUT_READ_b_ZFACTS[0] ].units ) cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_NAME, coord_vals = lev[:], units = lev.units, @@ -477,10 +477,10 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, print( Path( tmp_dir ) ) print( Path( os.getcwd() ) ) if Path( tmp_dir ).is_absolute(): - print('tmp_dir is absolute') + print(f'tmp_dir is absolute') make_cmor_write_here = tmp_dir elif Path( tmp_dir ).exists(): # relative to where we are - print('tmp_dir is relative to CWD!') + print(f'tmp_dir is relative to CWD!') make_cmor_write_here = os.getcwd() + '/'+tmp_dir # unavoidable, cmor module FORCES write to CWD assert make_cmor_write_here is not None @@ -542,8 +542,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, Path(nc_ps_file_work).unlink() if DEBUG_MODE_RUN_ONE: - print('WARNING: DEBUG_MODE_RUN_ONE is True!!!!') - print('WARNING: done processing one file!!!') + print(f'WARNING: DEBUG_MODE_RUN_ONE is True!!!!') + print(f'WARNING: done processing one file!!!') break @@ -667,7 +667,7 @@ def cmor_run_subtool( indir = None, ) if DEBUG_MODE_RUN_ONE: - print('WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') + print(f'WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') break From 515d56cecf929d6635daf95229baafe6b1c362a3 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Mon, 28 Oct 2024 15:23:18 -0400 Subject: [PATCH 08/12] Revert "alrightly, lets move some of this hardcoding to the top of the file, see how it goes..." This reverts commit 4b313384a8c0cda3bb4eab2d398d99d3ada8a93c. --- fre/cmor/cmor_mixer.py | 206 ++++++++++++++++++++++++++--------------- 1 file changed, 129 insertions(+), 77 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 6650238b..65a71469 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -14,6 +14,50 @@ import click import cmor +# ------ \start assumptions / potential future configuration thingies. +# GLOBAL hardcoded assumption: netcdf files always have an ending of '.nc' +# many implicit assumptions regarding the presence of metadata in the input netcdf file name +# e.g. datetime, variable name, name_of_set are all assumed to be at particular spots +# utf8 encoding +# MINOR: opening netcdf files in append or write or read +# MINOR: key names in certain input configuration files- these are tightly controlled +# +# for check_dataset_for_ocean_grids: +# input reading/checking hardcode - dataset doesn't have a variable named 'xh' +# for get_vertical_dimension: +# input reading/checking hardcode - dataset has dimension/axis name 'Z' +# for create_tmp_dir: +# input reading/checking hardcode - check output directory for specific drives local2, work, net +# output moving/writing hardcode - tmpdir name is 'tmp' if condition met +# +# for rewrite_netcdf_file_var: +# input reading/checking hardcode - dimensions named 'lat', 'lon', 'time' +# input reading/checking hardcode - "bounds" for above, named 'lat_bnds', 'lon_bnds', 'time_bnds' +# input reading/checking hardcode - check that var_dim is 3 or 4 +# input reading/checking hardcode - check that var_dim is 3 --> simple 3 dim subcase +# input reading/checking hardcode - if var_dim is 4, vert_dim must be one of the following: +# "plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf" +# input reading/checking hardcode - then subcases are relevant as follows: +# if vert_dim in ["plev30", "plev19", "plev8", "height2m"] --> SUBCASE +# elif vert_dim in ["level", "lev", "levhalf"] --> DISTINCT SUBCASE +# pressure input file is tagged with 'ps' potentially nearby +# sub_sub_case --> if vert_dim is lev_half +# input ds has zfactor values as 'ap_bnds', 'b_bnds' +# output moving/writing hardcode - output zfactors have names "ap_half", "b_half", +# output vertical level axis name "alternate_hybrid_sigma_half" +# sub_sub_case --> else +# input ds has zfactor values as 'ap' and 'b', and zfactor bnds as 'ap_bnds', 'b_bnds' +# output moving/writing hardcode - output zfactors have names "ap", "b", +# output vertical level axis name "alternate_hybrid_sigma" +# output moving/writing hardcode - output interpolated pressures have name "ps", units "Pa" +# output moving/writing hardcode - cmor setup parameters +# output moving/writing hardcode - lat/lon axies named "latitude"/"longitude" with units "degrees_N" "degrees_E" +# +# for cmorize_target_var_files: +# input reading/checking hardcode - pressure input file is tagged with 'ps' potentially nearby +# output moving/writing hardcode - pressure out file is named with 'ps' +# + # ----- \start consts @@ -21,10 +65,15 @@ # INPUT_READ_PS_FILE_VAR_NAME = 'ps' +INPUT_READDIR_NAME_CHECKS = [ [ 'contains', '/work'], + ['contains', '/net'], + ['equal', '/local2'] ] + +# OUTPUT_TEMP_DIR_NAME = 'tmp' # -INPUT_READ_TAG_OCEAN_GRID_VAR = 'xh' +INPUT_READ_OCEAN_GRID_VAR_NAME = 'xh' INPUT_READ_Z_AXIS_NAME = 'Z' # @@ -36,26 +85,36 @@ INPUT_READ_TIME_BNDS = 'time_bnds' # -INPUT_READ_AP_ZFACTS = ['ap','ap_bnds'] -INPUT_READ_B_ZFACTS = ['b','b_bnds'] +INPUT_ACCEPT_VAR_DIMS = [3,4] +INPUT_ACCEPT_VERT_DIMS = ["plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"] + +# ---- +#INPUT_SUBCASE1_VAR_VERT_DIMS = { "4": [ "plev30", "plev19", "plev8", "height2m" ] } + +# ---- +INPUT_SUBCASE2_VAR_VERT_DIMS = { "4": [ "level", "lev", "levhalf" ] } +#- +OUTPUT_SUBCASE2_PS_VAR_NAME = 'ps' +OUTPUT_SUBCASE2_PS_VAR_UNIT = 'Pa' + +# --- --- +INPUT_SUBCASE2_0_VAR_VERT_DIMS = { "4": [ "levhalf" ] } +INPUT_SUBCASE2_0_ZFACT_VALS = ['ap_bnds','b_bnds'] +#- +OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap_half','b_half'] +OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma_half' + +# --- --- +INPUT_SUBCASE2_1_VAR_VERT_DIMS = { "4": [ "level", "lev" ] } +INPUT_SUBCASE2_1_ZFACT_VALS = ['ap','b'] +INPUT_SUBCASE2_1_ZFACT_BNDS = ['ap_bnds','b_bnds'] +#- +OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap','b'] +OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma' -# -OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma" -OUTPUT_WRITE_AP_ZFACTS_NAME = "ap" -OUTPUT_WRITE_B_ZFACTS_NAME = "b" -# -OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME="alternate_hybrid_sigma_half" -OUTPUT_WRITE_AP_ZFACTS_HALF_NAME = "ap_half" -OUTPUT_WRITE_B_ZFACTS_HALF_NAME = "b_half" -# -INPUT_ACCEPT_VAR_DIMS = [3,4] -INPUT_ACCEPT_VERT_DIMS = ['plev30', 'plev19', 'plev8','height2m', 'level', 'lev', 'levhalf'] -# -OUTPUT_PS_VAR_NAME = 'ps' -OUTPUT_PS_VAR_UNIT = 'Pa' # ----- \end consts ### ------ helper functions ------ ### @@ -76,7 +135,7 @@ def copy_nc(in_nc, out_nc): # note- totally infuriating... # the correct value for the format arg is netCDF4.Dataset.data_model # and NOT netCDF4.Dataset.disk_format - dsout = nc.Dataset(out_nc, 'w', + dsout = nc.Dataset(out_nc, "w", format = dsin.data_model) #Copy dimensions @@ -131,10 +190,10 @@ def get_iso_datetimes(var_filenames, iso_datetime_arr = None): if iso_datetime_arr is None: iso_datetime_arr = [] for filename in var_filenames: - iso_datetime = filename.split('.')[1] + iso_datetime = filename.split(".")[1] if iso_datetime not in iso_datetime_arr: iso_datetime_arr.append( - filename.split('.')[1] ) + filename.split(".")[1] ) iso_datetime_arr.sort() #print(f"(get_iso_datetimes) Available dates: {iso_datetime_arr}") if len(iso_datetime_arr) < 1: @@ -146,10 +205,9 @@ def check_dataset_for_ocean_grid(ds): one argument. this function has no return. ds: netCDF4.Dataset object containing variables with associated dimensional information. ''' - if INPUT_READ_TAG_OCEAN_GRID_VAR in list(ds.variables.keys()): + if "xh" in list(ds.variables.keys()): raise NotImplementedError( - "(check_dataset_for_ocean_grid) {INPUT_READ_TAG_OCEAN_GRID_VAR} " - "found in var_list. ocean grid req'd but not yet unimplemented. stop.") + "(check_dataset_for_ocean_grid) 'xh' found in var_list. ocean grid req'd but not yet unimplemented. stop.") def get_vertical_dimension(ds,target_var): @@ -168,7 +226,7 @@ def get_vertical_dimension(ds,target_var): dims = variable.dimensions for dim in dims: # if it is not a vertical axis, move on. - if not (ds[dim].axis and ds[dim].axis == INPUT_READ_Z_AXIS_NAME): + if not (ds[dim].axis and ds[dim].axis == "Z"): continue vert_dim = dim return vert_dim @@ -183,14 +241,14 @@ def create_tmp_dir(outdir): ''' print(f"(create_tmp_dir) outdir = {outdir}") tmp_dir = None - if any( [ outdir == '/local2', - outdir.find('/work') != -1, - outdir.find('/net' ) != -1 ] ): + if any( [ outdir == "/local2", + outdir.find("/work") != -1, + outdir.find("/net" ) != -1 ] ): print(f'(create_tmp_dir) using /local /work /net ( tmp_dir = {outdir}/ )') - tmp_dir = str( Path(f"{outdir}/").resolve() ) + '/' + tmp_dir = str( Path("{outdir}/").resolve() ) else: - print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/{OUTPUT_TEMP_DIR_NAME}/ )') - tmp_dir = str( Path(f"{outdir}/{OUTPUT_TMP_DIR_NAME}/").resolve() ) + '/' + print(f'(create_tmp_dir) NOT using /local /work /net (tmp_dir = {outdir}/tmp/ )') + tmp_dir = str( Path(f"{outdir}/tmp/").resolve() ) try: os.makedirs(tmp_dir, exist_ok=True) except Exception as exc: @@ -206,22 +264,12 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' - rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. - accepts six arguments, all required: - proj_table_vars: json dictionary object, variable table read from json_table_config. - local_var: string, variable name used for finding files locally containing target_var, - this argument is often equal to target_var. - netcdf_file: string, representing path to intput netcdf file. - target_var: string, representing the variable name attached to the data object in the netcdf file. - json_exp_config: string, representing path to json configuration file holding metadata for appending to output - this argument is most used for making sure the right grid label is getting attached to the right output - json_table_config: string, representing path to json configuration file holding variable names for a given table. - proj_table_vars is read from this file, but both are passed anyways. + ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. ''' - print( "(rewrite_netcdf_file_var) input data: \n" - f" local_var = {local_var}\n" - f" target_var = {target_var}") + print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') + print( "(rewrite_netcdf_file_var) input data: " ) + print(f" local_var = {local_var}" ) + print(f" target_var = {target_var}") # open the input file @@ -237,20 +285,23 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, # figure out the dimension names programmatically TODO # Define lat and lon dimensions # Assume input file is lat/lon grid - lat = ds[INPUT_READ_LAT_DIM ][:] - lon = ds[INPUT_READ_LON_DIM ][:] - lat_bnds = ds[INPUT_READ_LAT_BNDS][:] - lon_bnds = ds[INPUT_READ_LON_BNDS][:] + lat = ds["lat"][:] + lon = ds["lon"][:] + lat_bnds = ds["lat_bnds"][:] + lon_bnds = ds["lon_bnds"][:] + + ## Define time + #time = ds["time"][:] # read in time_coords + units - time_coords = ds[INPUT_READ_TIME_DIM][:] - time_coord_units = ds[INPUT_READ_TIME_DIM].units + time_coords = ds["time"][:] + time_coord_units = ds["time"].units print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") # read in time_bnds , if present time_bnds = [] try: - time_bnds = ds[INPUT_READ_TIME_BNDS][:] + time_bnds = ds["time_bnds"][:] #print(f"(rewrite_netcdf_file_var) time_bnds = {time_bnds}") except ValueError: print( "(rewrite_netcdf_file_var) WARNING grabbing time_bnds didnt work... moving on") @@ -268,14 +319,15 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") # Check var_dim - if var_dim not in INPUT_ACCEPT_VAR_DIMS: - raise ValueError(f"var_dim is not in {INPUT_ACCEPT_VAR_DIMS}...\n stop.\n") + if var_dim not in [3, 4]: + raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") # Check var_dim and vert_dim and assign lev if relevant. # error if vert_dim wrong given var_dim lev = None if var_dim == 4: - if vert_dim not in INPUT_ACCEPT_VERT_DIMS: + if vert_dim not in [ "plev30", "plev19", "plev8", + "height2m", "level", "lev", "levhalf"] : raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] @@ -334,40 +386,40 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, elif vert_dim in ["level", "lev", "levhalf"]: # find the ps file nearby - ps_file = netcdf_file.replace(f'.{target_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') + ps_file = netcdf_file.replace(f'.{target_var}.nc', '.ps.nc') ds_ps = nc.Dataset(ps_file) - ps = ds_ps[INPUT_READ_PS_FILE_VAR_NAME][:].copy() + ps = ds_ps['ps'][:].copy() ds_ps.close() # assign lev_half specifics if vert_dim == "lev_half": ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_AP_ZFACTS_HALF_NAME, + zfactor_name = "ap_half", axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_AP_ZFACTS[1] ][:], - units = ds[ INPUT_READ_AP_ZFACTS[1] ].units ) + zfactor_values = ds["ap_bnds"][:], + units = ds["ap_bnds"].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_B_ZFACTS_HALF_NAME, + zfactor_name = "b_half", axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_B_ZFACTS[1] ][:], - units = ds[ INPUT_READ_B_ZFACTS[1] ].units ) - cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_HALF_NAME, + zfactor_values = ds["b_bnds"][:], + units = ds["b_bnds"].units ) + cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", coord_vals = lev[:], units = lev.units ) else: ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_AP_ZFACTS_NAME, + zfactor_name = "ap", axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_AP_ZFACTS[0] ][:], - zfactor_bounds = ds[ INPUT_READ_AP_ZFACTS[1] ][:], - units = ds[ INPUT_READ_AP_ZFACTS[0] ].units ) + zfactor_values = ds["ap"][:], + zfactor_bounds = ds["ap_bnds"][:], + units = ds["ap"].units ) ierr_b = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_WRITE_ZFACTS_NAME, + zfactor_name = "b", axis_ids = [cmor_lev, ], - zfactor_values = ds[ INPUT_READ_b_ZFACTS[0] ][:], - zfactor_bounds = ds[ INPUT_READ_b_ZFACTS[1] ][:], - units = ds[ INPUT_READ_b_ZFACTS[0] ].units ) - cmor_lev = cmor.axis( OUTPUT_WRITE_AXIS_LEVELS_NAME, + zfactor_values = ds["b"][:], + zfactor_bounds = ds["b_bnds"][:], + units = ds["b"].units ) + cmor_lev = cmor.axis( "alternate_hybrid_sigma", coord_vals = lev[:], units = lev.units, cell_bounds = ds[vert_dim+"_bnds"] ) @@ -375,9 +427,9 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) ips = cmor.zfactor( zaxis_id = cmor_lev, - zfactor_name = OUTPUT_PS_VAR_NAME, + zfactor_name = "ps", axis_ids = [cmor_time, cmor_lat, cmor_lon], - units = OUTPUT_PS_VAR_UNIT ) + units = "Pa" ) save_ps = True # assign axes at end of 4-dim case axes = [cmor_time, cmor_lev, cmor_lat, cmor_lon] @@ -464,8 +516,8 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, copy_nc( nc_fls[i], nc_file_work) # if the ps file exists, we'll copy it to the work directory too - nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') - nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', f'.{INPUT_READ_PS_FILE_VAR_NAME}.nc') + nc_ps_file = nc_fls[i].replace(f'.{local_var}.nc', '.ps.nc') + nc_ps_file_work = nc_file_work.replace(f'.{local_var}.nc', '.ps.nc') if Path(nc_ps_file).exists(): print(f"(cmorize_target_var_files) nc_ps_file_work = {nc_ps_file_work}") copy_nc(nc_ps_file, nc_ps_file_work) From 434dc3e9aeda5d1bfa334df538ecd9f43a7c6839 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Mon, 28 Oct 2024 17:49:55 -0400 Subject: [PATCH 09/12] bug fix for a case or two, tidy up the presentation of the test cases for now --- fre/cmor/cmor_mixer.py | 133 ++++------------------ fre/tests/test_fre_app_cli.py | 1 + run_test_file_cases.py | 207 ++++++++++++++++++++++------------ 3 files changed, 158 insertions(+), 183 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 65a71469..0f4ef243 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -14,107 +14,9 @@ import click import cmor -# ------ \start assumptions / potential future configuration thingies. -# GLOBAL hardcoded assumption: netcdf files always have an ending of '.nc' -# many implicit assumptions regarding the presence of metadata in the input netcdf file name -# e.g. datetime, variable name, name_of_set are all assumed to be at particular spots -# utf8 encoding -# MINOR: opening netcdf files in append or write or read -# MINOR: key names in certain input configuration files- these are tightly controlled -# -# for check_dataset_for_ocean_grids: -# input reading/checking hardcode - dataset doesn't have a variable named 'xh' -# for get_vertical_dimension: -# input reading/checking hardcode - dataset has dimension/axis name 'Z' -# for create_tmp_dir: -# input reading/checking hardcode - check output directory for specific drives local2, work, net -# output moving/writing hardcode - tmpdir name is 'tmp' if condition met -# -# for rewrite_netcdf_file_var: -# input reading/checking hardcode - dimensions named 'lat', 'lon', 'time' -# input reading/checking hardcode - "bounds" for above, named 'lat_bnds', 'lon_bnds', 'time_bnds' -# input reading/checking hardcode - check that var_dim is 3 or 4 -# input reading/checking hardcode - check that var_dim is 3 --> simple 3 dim subcase -# input reading/checking hardcode - if var_dim is 4, vert_dim must be one of the following: -# "plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf" -# input reading/checking hardcode - then subcases are relevant as follows: -# if vert_dim in ["plev30", "plev19", "plev8", "height2m"] --> SUBCASE -# elif vert_dim in ["level", "lev", "levhalf"] --> DISTINCT SUBCASE -# pressure input file is tagged with 'ps' potentially nearby -# sub_sub_case --> if vert_dim is lev_half -# input ds has zfactor values as 'ap_bnds', 'b_bnds' -# output moving/writing hardcode - output zfactors have names "ap_half", "b_half", -# output vertical level axis name "alternate_hybrid_sigma_half" -# sub_sub_case --> else -# input ds has zfactor values as 'ap' and 'b', and zfactor bnds as 'ap_bnds', 'b_bnds' -# output moving/writing hardcode - output zfactors have names "ap", "b", -# output vertical level axis name "alternate_hybrid_sigma" -# output moving/writing hardcode - output interpolated pressures have name "ps", units "Pa" -# output moving/writing hardcode - cmor setup parameters -# output moving/writing hardcode - lat/lon axies named "latitude"/"longitude" with units "degrees_N" "degrees_E" -# -# for cmorize_target_var_files: -# input reading/checking hardcode - pressure input file is tagged with 'ps' potentially nearby -# output moving/writing hardcode - pressure out file is named with 'ps' -# - - - # ----- \start consts DEBUG_MODE_RUN_ONE = True -# -INPUT_READ_PS_FILE_VAR_NAME = 'ps' -INPUT_READDIR_NAME_CHECKS = [ [ 'contains', '/work'], - ['contains', '/net'], - ['equal', '/local2'] ] - -# -OUTPUT_TEMP_DIR_NAME = 'tmp' - -# -INPUT_READ_OCEAN_GRID_VAR_NAME = 'xh' -INPUT_READ_Z_AXIS_NAME = 'Z' - -# -INPUT_READ_LAT_DIM = 'lat' -INPUT_READ_LAT_BNDS = 'lat_bnds' -INPUT_READ_LON_DIM = 'lon' -INPUT_READ_LON_BNDS = 'lon_bnds' -INPUT_READ_TIME_DIM = 'time' -INPUT_READ_TIME_BNDS = 'time_bnds' - -# -INPUT_ACCEPT_VAR_DIMS = [3,4] -INPUT_ACCEPT_VERT_DIMS = ["plev30", "plev19", "plev8","height2m", "level", "lev", "levhalf"] - -# ---- -#INPUT_SUBCASE1_VAR_VERT_DIMS = { "4": [ "plev30", "plev19", "plev8", "height2m" ] } - -# ---- -INPUT_SUBCASE2_VAR_VERT_DIMS = { "4": [ "level", "lev", "levhalf" ] } -#- -OUTPUT_SUBCASE2_PS_VAR_NAME = 'ps' -OUTPUT_SUBCASE2_PS_VAR_UNIT = 'Pa' - -# --- --- -INPUT_SUBCASE2_0_VAR_VERT_DIMS = { "4": [ "levhalf" ] } -INPUT_SUBCASE2_0_ZFACT_VALS = ['ap_bnds','b_bnds'] -#- -OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap_half','b_half'] -OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma_half' - -# --- --- -INPUT_SUBCASE2_1_VAR_VERT_DIMS = { "4": [ "level", "lev" ] } -INPUT_SUBCASE2_1_ZFACT_VALS = ['ap','b'] -INPUT_SUBCASE2_1_ZFACT_BNDS = ['ap_bnds','b_bnds'] -#- -OUTPUT_SUBCASE2_0_ZFACT_VAL_NAMES = ['ap','b'] -OUTPUT_SUBCASE2_0_VERT_LVL_NAME = 'altername_hybrid_sigma' - - - - # ----- \end consts ### ------ helper functions ------ ### @@ -226,6 +128,9 @@ def get_vertical_dimension(ds,target_var): dims = variable.dimensions for dim in dims: # if it is not a vertical axis, move on. + print(f'(get_vertical_dimension) dim={dim}') + if dim == 'landuse': + continue if not (ds[dim].axis and ds[dim].axis == "Z"): continue vert_dim = dim @@ -264,7 +169,18 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, target_var = None, json_exp_config = None, json_table_config = None):#, tmp_dir = None ): - ''' rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + ''' + rewrite the input netcdf file nc_fl containing target_var in a CMIP-compliant manner. + accepts six arguments, all required: + proj_table_vars: json dictionary object, variable table read from json_table_config. + local_var: string, variable name used for finding files locally containing target_var, + this argument is often equal to target_var. + netcdf_file: string, representing path to intput netcdf file. + target_var: string, representing the variable name attached to the data object in the netcdf file. + json_exp_config: string, representing path to json configuration file holding metadata for appending to output + this argument is most used for making sure the right grid label is getting attached to the right output + json_table_config: string, representing path to json configuration file holding variable names for a given table. + proj_table_vars is read from this file, but both are passed anyways. ''' print('\n\n-------------------------- START rewrite_netcdf_file_var call -----') print( "(rewrite_netcdf_file_var) input data: " ) @@ -392,7 +308,10 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ds_ps.close() # assign lev_half specifics - if vert_dim == "lev_half": + if vert_dim == "levhalf": + cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", + coord_vals = lev[:], + units = lev.units ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap_half", axis_ids = [cmor_lev, ], @@ -403,10 +322,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, axis_ids = [cmor_lev, ], zfactor_values = ds["b_bnds"][:], units = ds["b_bnds"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma_half", - coord_vals = lev[:], - units = lev.units ) else: + cmor_lev = cmor.axis( "alternate_hybrid_sigma", + coord_vals = lev[:], + units = lev.units, + cell_bounds = ds[vert_dim+"_bnds"] ) ierr_ap = cmor.zfactor( zaxis_id = cmor_lev, zfactor_name = "ap", axis_ids = [cmor_lev, ], @@ -419,10 +339,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, zfactor_values = ds["b"][:], zfactor_bounds = ds["b_bnds"][:], units = ds["b"].units ) - cmor_lev = cmor.axis( "alternate_hybrid_sigma", - coord_vals = lev[:], - units = lev.units, - cell_bounds = ds[vert_dim+"_bnds"] ) print(f'(rewrite_netcdf_file_var) ierr_ap after calling cmor_zfactor: {ierr_ap}\n' f'(rewrite_netcdf_file_var) ierr_b after calling cmor_zfactor: {ierr_b}' ) @@ -493,7 +409,7 @@ def cmorize_target_var_files( indir = None, target_var = None, local_var = None, #determine a tmp dir for working on files. - tmp_dir = create_tmp_dir( outdir ) + tmp_dir = create_tmp_dir( outdir ) + '/' print(f'(cmorize_target_var_files) will use tmp_dir={tmp_dir}') @@ -721,6 +637,7 @@ def cmor_run_subtool( indir = None, if DEBUG_MODE_RUN_ONE: print(f'WARNING: DEBUG_MODE_RUN_ONE is True. breaking var_list loop') break + return 0 @click.command() diff --git a/fre/tests/test_fre_app_cli.py b/fre/tests/test_fre_app_cli.py index 4b61ff75..ca14df51 100644 --- a/fre/tests/test_fre_app_cli.py +++ b/fre/tests/test_fre_app_cli.py @@ -1,6 +1,7 @@ """ test "fre app" calls """ import os +import subprocess from pathlib import Path import click diff --git a/run_test_file_cases.py b/run_test_file_cases.py index 04234bc7..e22c8fa1 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -13,6 +13,16 @@ import fre from fre.cmor.cmor_mixer import cmor_run_subtool as run_cmor +def print_the_outcome(some_return,case_str): + print('-----------------------------------------------------------------------------------------------------------------') + if some_return != 0: + print(f'{case_str} case failed[[[FAIL -_-]]]: some_return={some_return}') + else: + print(f'{case_str} case probably OK [[[PROB-OK ^-^]]]: some_return={some_return}') + print('-----------------------------------------------------------------------------------------------------------------') + print(f'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n') + assert some_return == 0 + # global consts for these tests, with no/trivial impact on the results ROOTDIR='fre/tests/test_files' CMORBITE_VARLIST=f'{ROOTDIR}/CMORbite_var_list.json' @@ -23,16 +33,26 @@ def run_cmor_RUN(filename, table, opt_var_name): - func_debug = True - if func_debug: - print('run_cmor(' - f' indir = {str(Path(filename).parent)},' - f' json_var_list = {CMORBITE_VARLIST},' - f' json_table_config = {ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json,' - f' json_exp_config = {EXP_CONFIG_DEFAULT},' - f' outdir = {os.getcwd()},' - f' opt_var_name = opt_var_name' - ')' + func_debug1 = False + if func_debug1: + print('run_cmor(\n' + f' indir = \"{str(Path(filename).parent)}\",\n' + f' json_var_list = \"{CMORBITE_VARLIST}\",\n' + f' json_table_config = \"{ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json\",\n' + f' json_exp_config = \"{EXP_CONFIG_DEFAULT}\",\n' + f' outdir = \"{os.getcwd()}\",\n' + f' opt_var_name = \"{opt_var_name}\"\n' + ')\n' + ) + func_debug2 = True + if func_debug2: + print('fre cmor run ' + f'-d {str(Path(filename).parent)} ' + f'-l {CMORBITE_VARLIST} ' + f'-r {ROOTDIR}/cmip6-cmor-tables/Tables/CMIP6_{table}.json ' + f'-p {EXP_CONFIG_DEFAULT} ' + f'-o {os.getcwd()} ' + f'-v {opt_var_name} ' ) FOO_return = run_cmor( indir = str(Path(filename).parent), @@ -45,96 +65,133 @@ def run_cmor_RUN(filename, table, opt_var_name): return FOO_return -# 1) SUCCEEDs -# land, Lmon, gr1 -# Result - one file debug mode success, but the exp_config has the wrong grid, amongst other thinhgs?> -testfile_land_gr1_Lmon = \ - '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' -some_return = run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') -print(f'some_return={some_return}') -sys.exit() -#assert False +## 1) SUCCEEDs +## land, Lmon, gr1 +#testfile_land_gr1_Lmon = \ +# '/archive/Eric.Stofferahn/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/land/ts/monthly/5yr/land.005101-005512.lai.nc' +#try: +# some_return = run_cmor_RUN(testfile_land_gr1_Lmon, 'Lmon', opt_var_name = 'lai') +#except: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'land_gr1_Lmon / lai') -## 2) FAIL -## native vertical atmos, (Amon, AERmon: gr1), just like above, but with nontrivial vertical levels? -## this one is more typical, on the FULL ATMOS LEVELS -## Amon / cl -## Result - error, UnboundLocalError: local variable 'cmor_lev' referenced before assignment (ps file handing double check!!!) -## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, +## 2) SUCCEEDs +## atmos, Amon / cl #testfile_atmos_level_cmip_gr1_Amon_complex_vert = \ # '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.196001-196412.cl.nc' -#run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') -#assert False - -## 3) FAIL -## this one is on the ATMOS HALF-LEVELS -## Amon / mc -## Result - error, UnboundLocalError: local variable 'cmor_lev' referenced before assignment (ps file handing double check!!!) -## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpatmos_level_cmip.185001-185412.mc.nc +#try: +# some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_complex_vert, 'Amon', opt_var_name = 'cl') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_complex_vert / cl') + + +## 3) SUCCEEDs +## atmos, Amon / mc #testfile_atmos_level_cmip_gr1_Amon_fullL = \ # '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_level_cmip/ts/monthly/5yr/atmos_level_cmip.195501-195912.mc.nc' -#run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') -#assert False +#try: +# some_return = run_cmor_RUN(testfile_atmos_level_cmip_gr1_Amon_fullL, 'Amon', opt_var_name = 'mc') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_fullL / mc') -## 4) FAIL -## zonal averages. AmonZ... no AmonZ table though??? -## !!!REPLACING AmonZ w/ Amon!!! + +## 4) FAIL (no longitude coordinate case) +## atmos, Amoon / ta ## just like #1, but lack longitude -## Result - error, lat/lon hardcoding as chris was saying would break: File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / -## WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpatmos_plev39_cmip.185001-185412.ta.nc +## Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / #testfile_atmos_gr1_AmonZ_nolons = \ # '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' -#run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') -#assert False +#try: +# some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') + -## 5) PARTIAL FAIL -## ocean regridded, gr. seaice could be slightly different (Omon?) #TODO -## Result - success WITH BUG: problematic file path in copy nc... /home/Ian.Laflotte/Working/fre-cli/tmpocean_monthly_1x1deg.185001-185412.sos.n, +## 5) SUCCEEDS +## ocean, Omon / sos #testfile_ocean_monthly_1x1deg_gr = \ # '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/ocean_monthly_1x1deg/ts/monthly/5yr/ocean_monthly_1x1deg.190001-190412.sos.nc' -#run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') -#assert False +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_1x1deg_gr, 'Omon', opt_var_name = 'sos') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_1x1deg_gr / sos') + -## ocean native, gn. seaice could be slightly different (Omon?) #TODO -## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists. + +## 6) FAIL (copy_nc failure!!! WEIRD) +## ocean, Omon / sos +## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists. #testfile_ocean_monthly_gn = \ # '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ts/monthly/5yr/ocean_monthly.002101-002512.sos.nc' -#run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') -#assert False - -## 6) FAIL -## ocean 3D, either. seaice could be slightly different (Omon?) #TODO -## just like #4 and #5, analogous to #2 (this is kinda funny... zonal averaged, horizontally regridded but maybe not, w/ native vertical levels (half or full?)? -## this one is regridded (1x1 deg was regrid above so it's not the native resolution) -## Result - error, AttributeError: NetCDF: Attempt to define fill value when data already exists +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_gn, 'Omon', opt_var_name = 'sos') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_gn / sos') + + + +## 7) FAIL (copy_nc failure!!! WEIRD) +## ocean, Omon / so +## Result - identical failure to #6 #testfile_ocean_monthly_z_1x1deg_gr = \ # '/archive/ejs/CMIP7/ESM4/DEV/ESM4.5v01_om5b04_piC/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly_z_1x1deg/ts/monthly/5yr/ocean_monthly_z_1x1deg.000101-000512.so.nc' -#run_cmor_RUN(testfile_ocean_monthly_z_1x1deg_gr, 'Omon', opt_var_name = 'so') -#assert False +#try: +# some_return = run_cmor_RUN(testfile_ocean_monthly_z_1x1deg_gr, 'Omon', opt_var_name = 'so') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'ocean_monthly_z_1x1deg_gr / so') + -## 7) FAIL -## global scalars, gn, e.g. Amon -## lack longitude and latitude +## 8) FAIL (no latitude nor longitude coordinates cases) +## atmos, Amon / ch4global ## Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / #testfile_atmos_scalar_gn_Amon_nolon_nolat = \ # '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' -#run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') -#assert False - -# 8) FAIL -# phase 2L landuse land output, gr1, e.g. Emon -# “landuse” as a dimension -# Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): -# File "src/netCDF4/_netCDF4.pyx", line 4932, in netCDF4._netCDF4.Variable.__getattr__ -# File "src/netCDF4/_netCDF4.pyx", line 4654, in netCDF4._netCDF4.Variable.getncattr -# File "src/netCDF4/_netCDF4.pyx", line 1617, in netCDF4._netCDF4._get_att -# File "src/netCDF4/_netCDF4.pyx", line 2113, in netCDF4._netCDF4._ensure_nc_success +#try: +# some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') + + +# 9) FAIL +# LUmip, Emon / gppLut +# Result - error, +# File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", +# line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): # AttributeError: NetCDF: Attribute not found testfile_LUmip_refined_gr1_Emon_landusedim = \ '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' -run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') -assert False +try: + some_return = run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') +except Exception as exc: + print(f'exception caught: exc=\n{exc}') + some_return=-1 + pass +print_the_outcome(some_return,'LUmip_refined_gr1_Emon_langusedim / gppLut') + From 14b08cdc962bbaaf4728d4cc00b766b70c9adbda Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Tue, 29 Oct 2024 11:10:12 -0400 Subject: [PATCH 10/12] update readme and frecmor.py with better doc strings and help descriptions at the cli. update test file cases for addressing no lat/lon case(s) --- fre/cmor/README.md | 45 +++++++++++++++++++------- fre/cmor/frecmor.py | 27 ++++++++++++---- run_test_file_cases.py | 73 +++++++++++++++++++++--------------------- 3 files changed, 89 insertions(+), 56 deletions(-) diff --git a/fre/cmor/README.md b/fre/cmor/README.md index 8f5179c6..865c1ee7 100644 --- a/fre/cmor/README.md +++ b/fre/cmor/README.md @@ -29,18 +29,39 @@ this subtool's help, and command-specific `run` help: # subtool command-specific help, e.g. for run -> fre cmor run --help - Usage: fre cmor run [OPTIONS] - - Rewrite climate model output - - Options: - -d, --indir TEXT Input directory [required] - -l, --varlist TEXT Variable list [required] - -r, --table_config TEXT Table configuration [required] - -p, --exp_config TEXT Experiment configuration [required] - -o, --outdir TEXT Output directory [required] - --help Show this message and exit. +> fre cmor run --help +Usage: fre cmor run [OPTIONS] + + Rewrite climate model output files with CMIP-compliant metadata for down- + stream publishing + +Options: + -d, --indir TEXT directory containing netCDF files. keys specified + in json_var_list are local variable names used for + targeting specific files in this directory + [required] + -l, --varlist TEXT path pointing to a json file containing directory + of key/value pairs. the keys are the 'local' names + used in the filename, and the values pointed to by + those keys are strings representing the name of the + variable contained in targeted files. the key and + value are often the same, but it is not required. + [required] + -r, --table_config TEXT json file containing CMIP-compliant per- + variable/metadata for specific MIP table. The MIP + table can generally be identified by the specific + filename (e.g. 'Omon') [required] + -p, --exp_config TEXT json file containing metadata dictionary for + CMORization. this metadata is effectively appended + to the final output file's header [required] + -o, --outdir TEXT directory root that will contain the full output + and output directory structure generated by the + cmor module upon request. [required] + -v, --opt_var_name TEXT optional, specify a variable name to specifically + process only filenames matching that variable name. + I.e., this string help target local_vars, not + target_vars. + --help Show this message and exit. ``` diff --git a/fre/cmor/frecmor.py b/fre/cmor/frecmor.py index 7815b1ac..e882186a 100644 --- a/fre/cmor/frecmor.py +++ b/fre/cmor/frecmor.py @@ -11,32 +11,45 @@ def cmor_cli(): @cmor_cli.command() @click.option("-d", "--indir", type=str, - help="Input directory", + help="directory containing netCDF files. keys specified in json_var_list are local " + \ + "variable names used for targeting specific files in this directory", required=True) @click.option("-l", "--varlist", type=str, - help="Variable list", + help="path pointing to a json file containing directory of key/value pairs. " + \ + "the keys are the \'local\' names used in the filename, and the values " + \ + "pointed to by those keys are strings representing the name of the variable " + \ + "contained in targeted files. the key and value are often the same, " + \ + "but it is not required.", required=True) @click.option("-r", "--table_config", type=str, - help="Table configuration", + help="json file containing CMIP-compliant per-variable/metadata for specific " + \ + "MIP table. The MIP table can generally be identified by the specific " + \ + "filename (e.g. \'Omon\')", required=True) @click.option("-p", "--exp_config", type=str, - help="Experiment configuration", + help="json file containing metadata dictionary for CMORization. this metadata is " + \ + "effectively appended to the final output file's header", required=True) @click.option("-o", "--outdir", type=str, - help="Output directory", + help="directory root that will contain the full output and output directory " + \ + "structure generated by the cmor module upon request.", required=True) @click.option('-v', "--opt_var_name", type = str, - help="optional variable name filter. if the variable name in the targeted file doesnt match, the variable is skipped", + help="optional, specify a variable name to specifically process only filenames " + \ + "matching that variable name. I.e., this string help target local_vars, not " + \ + "target_vars.", required=False) @click.pass_context def run(context, indir, varlist, table_config, exp_config, outdir, opt_var_name): # pylint: disable=unused-argument - """Rewrite climate model output""" + """ + Rewrite climate model output files with CMIP-compliant metadata for down-stream publishing + """ context.invoke( _cmor_run_subtool, indir = indir, diff --git a/run_test_file_cases.py b/run_test_file_cases.py index e22c8fa1..a588f803 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -104,19 +104,19 @@ def run_cmor_RUN(filename, table, opt_var_name): #print_the_outcome(some_return,'atmos_level_cmip_gr1_Amon_fullL / mc') -## 4) FAIL (no longitude coordinate case) -## atmos, Amoon / ta -## just like #1, but lack longitude -## Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / -#testfile_atmos_gr1_AmonZ_nolons = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' -#try: -# some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') -#except Exception as exc: -# print(f'exception caught: exc=\n{exc}') -# some_return=-1 -# pass -#print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') +# 4) FAIL (no longitude coordinate case) +# atmos, Amoon / ta +# just like #1, but lack longitude +# Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / +testfile_atmos_gr1_AmonZ_nolons = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' +try: + some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') +except Exception as exc: + print(f'exception caught: exc=\n{exc}') + some_return=-1 + pass +print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') ## 5) SUCCEEDS @@ -162,35 +162,34 @@ def run_cmor_RUN(filename, table, opt_var_name): #print_the_outcome(some_return,'ocean_monthly_z_1x1deg_gr / so') -## 8) FAIL (no latitude nor longitude coordinates cases) -## atmos, Amon / ch4global -## Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / -#testfile_atmos_scalar_gn_Amon_nolon_nolat = \ -# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' -#try: -# some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') -#except Exception as exc: -# print(f'exception caught: exc=\n{exc}') -# some_return=-1 -# pass -#print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') - - -# 9) FAIL -# LUmip, Emon / gppLut -# Result - error, -# File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", -# line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): -# AttributeError: NetCDF: Attribute not found -testfile_LUmip_refined_gr1_Emon_landusedim = \ - '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' +# 8) FAIL (no latitude nor longitude coordinates cases) +# atmos, Amon / ch4global +# Result - error, File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lat not found in / +testfile_atmos_scalar_gn_Amon_nolon_nolat = \ + '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_scalar/ts/monthly/5yr/atmos_scalar.197001-197412.ch4global.nc' try: - some_return = run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') + some_return = run_cmor_RUN(testfile_atmos_scalar_gn_Amon_nolon_nolat, 'Amon', opt_var_name = 'ch4global') except Exception as exc: print(f'exception caught: exc=\n{exc}') some_return=-1 pass -print_the_outcome(some_return,'LUmip_refined_gr1_Emon_langusedim / gppLut') +print_the_outcome(some_return,'atmos_scalar_gn_Amon_nolon_nolat / ch4global') + + +## 9) FAIL (4 dimensional data with no vertical) +## Result - error, +## File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", +## line 134, in get_vertical_dimension if not (ds[dim].axis and ds[dim].axis == "Z"): +## AttributeError: NetCDF: Attribute not found +#testfile_LUmip_refined_gr1_Emon_landusedim = \ +# '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/LUmip_refined/ts/monthly/5yr/LUmip_refined.185001-185412.gppLut.nc' +#try: +# some_return = run_cmor_RUN(testfile_LUmip_refined_gr1_Emon_landusedim, 'Emon', opt_var_name = 'gppLut') +#except Exception as exc: +# print(f'exception caught: exc=\n{exc}') +# some_return=-1 +# pass +#print_the_outcome(some_return,'LUmip_refined_gr1_Emon_langusedim / gppLut') From d80b79078593c208e7be0262ca342141d52de502 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Tue, 29 Oct 2024 14:54:48 -0400 Subject: [PATCH 11/12] intermediate commit- new function to compar variable dimensions. none checks, trying to generally allow this code to be more flexible. not currently working atm --- fre/cmor/cmor_mixer.py | 195 +++++++++++++++++++++++++++++++++++------ run_test_file_cases.py | 9 +- 2 files changed, 175 insertions(+), 29 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 0f4ef243..6ba4cdf3 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -20,6 +20,61 @@ # ----- \end consts ### ------ helper functions ------ ### +def check_table_dims_v_var_dims(json_table_config = None, table_var_dims = None, var_dims = None): + ''' + checks the mip table's required dimensions against the current input netcdf file's dimensions. if a req'd + dimension isn't found in the input netcdf file, either as an exact match or a match to a standard/long/out name field, + we throw an error. currently accepts three arguments, two required and one optional + json_table_config: string, optional + table_var_dims: tuple of strings, req'd + var_dims: tuple of strings, req'd + ''' + # check if the information req'd by the table is present in the list assoc with the variable + print(f'\n\n(check_table_dims_v_var_dims) attempting to compare target_var dimension names with table variable entry dimension names...') + for proj_table_var_dim in table_var_dims: + print(f'-----------------------------------------------------------------------------------------------------------------------------------------') + print(f'(check_table_dims_v_var_dims) proj_table_var_dim = {proj_table_var_dim}') + + # check the mip coordinate file for accepted standard_name, out_name, long_name flavors + cmip_coords = None + try: + json_coordinate_config = str(Path(json_table_config).parent) + '/CMIP6_coordinate.json' + print(f'(check_table_dims_v_var_dims) json_coordinate_config = {json_coordinate_config}') + with open ( json_coordinate_config, 'r', encoding="utf-8") as json_cmip_coords: + cmip_coords = json.load( json_cmip_coords ) + except Exception as exc: + print(f'(check_table_dims_v_var_dims) trying to open MIP coordinate files... caught exception: exc = \n {exc}') + print(f' WARNING wont be making extra checks against accepted coordinat names...') + + dim_is_present=False + for var_dim in var_dims: + print(f'(check_table_dims_v_var_dims) var_dim = {var_dim}') + if var_dim == proj_table_var_dim: + print(f' required coordinate dimension {proj_table_var_dim} is present in the netcdf file') + dim_is_present=True + break + + if cmip_coords is not None: + print(f' did not find the exact coordinate name {proj_table_var_dim} is present in the netcdf file ') + print(f' .... checking standard/out/long names ....' ) + accepted_names = [ + cmip_coords['axis_entry'][proj_table_var_dim]['standard_name'], + cmip_coords['axis_entry'][proj_table_var_dim]['out_name'], + cmip_coords['axis_entry'][proj_table_var_dim]['long_name'] ] + print(f'(check_table_dims_v_var_dims) accepted_names = \n {accepted_names}') + if var_dim in accepted_names: + print(f' required coordinate dimension {proj_table_var_dim} is present in the netcdf file') + print(f' WARNING!!!! not an exact match for {proj_table_var_dim}.. it actually matches one of... {accepted_names}') + dim_is_present=True + break + + print(f'-----------------------------------------------------------------------------------------------------------------------------------------\n\n') + if not dim_is_present: + raise ValueError(f'(check_table_dims_v_var_dims)' + f' ERROR: dimension {proj_table_var_dim} is required for variable {target_var} / table {Path(json_table_config).name}') + print(f'(check_table_dims_v_var_dims) done comparing dimension names of between the table and the file\'s data.') + + def copy_nc(in_nc, out_nc): ''' copy target input netcdf file in_nc to target out_nc. I have to think this is not a trivial copy @@ -193,23 +248,74 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ds = nc.Dataset(netcdf_file,'a') + # read what specific dimensions are req'd by the table for this variable + print(f'(rewrite_netcdf_file_var) attempting to read target_var dimension names FROM proj_table_vars...') + proj_table_var_dims = tuple( [ + var_dim for var_dim in proj_table_vars['variable_entry'][target_var]["dimensions"].split(' ') + ] ) + print(f' proj_table_var_dims = {proj_table_var_dims}') + print(f' type(proj_table_var_dims) = {type(proj_table_var_dims)}') + + + + + # read what specific dimensions are assoc with the variable (lat/lon/plev etc.) + print(f'(rewrite_netcdf_file_var) attempting to read target_var dimension names...') + var_dims = ds[target_var].dimensions + print(f' var_dims = {var_dims}') + + + + check_table_dims_v_var_dims( + json_table_config = json_table_config, + table_var_dims = ('latitude', 'time', 'plev19', 'longitude'),#proj_table_var_dims, + var_dims = var_dims + ) + # # check if the information req'd by the table is present in the list assoc with the variable + # print(f'(rewrite_netcdf_file_var) attempting to compare target_var dimension names with table variable entry dimension names...') + # for proj_table_var_dim in proj_table_var_dims: + # dim_is_present=False + # for var_dim in var_dims: + # if proj_table_var_dim == var_dim: + # print(f'required coordinate dimension {proj_table_var_dim} is present in the netcdf file') + # dim_is_present=True + # break + # + # if not dim_is_present: + # raise ValueError(f'(rewrite_netcdf_file_var) ERROR: dimension {proj_table_var_dim} is required for variable {target_var} / table {Path(json_table_config).name}') + # print(f'(rewrite_netcdf_file_var) done comparing dimension names of between the table and the file\'s data.') + + + # ocean grids are not implemented yet. print( '(rewrite_netcdf_file_var) checking input netcdf file for oceangrid condition') check_dataset_for_ocean_grid(ds) - # figure out the dimension names programmatically TODO - # Define lat and lon dimensions - # Assume input file is lat/lon grid - lat = ds["lat"][:] - lon = ds["lon"][:] - lat_bnds = ds["lat_bnds"][:] - lon_bnds = ds["lon_bnds"][:] - - ## Define time - #time = ds["time"][:] + # Attempt to read lat coordinates + print(f'(rewrite_netcdf_file_var) attempting to read coordinate(s), lat, lat_bnds') + lat, lat_bnds = None, None + try: + lat, lat_bnds = ds["lat"][:], ds["lat_bnds"][:] + except Exception as exc: + print(f'(rewrite_netcdf_file_var) WARNING could not read latitude coordinate. moving on.\n exc = {exc}') + print(f' lat = {lat}') + print(f' lat_bnds = {lat_bnds}') + pass + + # Attempt to read lon coordinates + print(f'(rewrite_netcdf_file_var) attempting to read coordinate(s), lon, lon_bnds') + lon, lon_bnds = None, None + try: + lon, lon_bnds = ds["lon"][:], ds["lon_bnds"][:] + except Exception as exc: + print(f'(rewrite_netcdf_file_var) WARNING could not read longitude coordinate. moving on.\n exc = {exc}') + print(f' lon = {lon}') + print(f' lon_bnds = {lon_bnds}') + pass # read in time_coords + units + #time = ds["time"][:] time_coords = ds["time"][:] time_coord_units = ds["time"].units print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") @@ -230,21 +336,27 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, vert_dim = get_vertical_dimension(ds, target_var) print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") - # grab var_dim - var_dim = len(var.shape) - print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") - # Check var_dim - if var_dim not in [3, 4]: - raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") + # grab var_N_dims and check it's values + var_N_dims = len(var.shape) + print(f"(rewrite_netcdf_file_var) var_N_dims = {var_N_dims}, local_var = {local_var}") + if var_N_dims not in [3, 4]: + raise ValueError(f"var_N_dims == {var_N_dims} != 3 nor 4. stop.") + + + print(f'(rewrite_netcdf_file_var) ASSERTING FALSE NOW') + raise Exception() - # Check var_dim and vert_dim and assign lev if relevant. - # error if vert_dim wrong given var_dim + + + + # Check var_N_dims and vert_dim and assign lev if relevant. + # error if vert_dim wrong given var_N_dims lev = None - if var_dim == 4: + if var_N_dims == 4: if vert_dim not in [ "plev30", "plev19", "plev8", "height2m", "level", "lev", "levhalf"] : - raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') + raise ValueError(f'var_N_dims={var_N_dims}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] @@ -266,11 +378,27 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") cmor.load_table(json_table_config) + # read units units = proj_table_vars["variable_entry"] [target_var] ["units"] print(f"(rewrite_netcdf_file_var) units={units}") - cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") - cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") + # setup cmor latitude axis if relevant + print(f'(rewrite_netcdf_file_var) assigning cmor_lat') + cmor_lat = None + if any( [ lat is None, lat_bnds is None ] ): + print(f'(rewrite_netcdf_file_var) WARNING: lat or lat_bnds is None, skipping assigning cmor_lat') + else: + cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") + + # setup cmor longitude axis if relevant + print(f'(rewrite_netcdf_file_var) assigning cmor_lon') + cmor_lon = None + if any( [ lon is None, lon_bnds is None ] ): + print(f'(rewrite_netcdf_file_var) WARNING: lon or lon_bnds is None, skipping assigning cmor_lon') + else: + cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") + + # setup cmor_time axis if relevant try: print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" f" coord_vals = \n{time_coords}, \n" @@ -290,11 +418,27 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ips = None # set axes for 3-dim case - if var_dim == 3: - axes = [cmor_time, cmor_lat, cmor_lon] + if var_N_dims == 3: + axes = [] + if cmor_time is not None: + axes.append(cmor_time) + else: + print(f'(rewrite_netcdf_file_var) WARNING: cmor_time is None!!! moving on...') + if cmor_lat is not None: + axes.append(cmor_lat) + else: + print(f'(rewrite_netcdf_file_var) WARNING: cmor_lat is None!!! moving on...') + if cmor_lon is not None: + axes.append(cmor_lon) + else: + print(f'(rewrite_netcdf_file_var) WARNING: cmor_lon is None!!! moving on...') print(f"(rewrite_netcdf_file_var) axes = {axes}") + + #axes2 = [cmor_time, cmor_lat, cmor_lon] + #print(f"(rewrite_netcdf_file_var) axes2 = {axes2}") + # set axes for 4-dim case - elif var_dim == 4: + elif var_N_dims == 4: if vert_dim in ["plev30", "plev19", "plev8", "height2m"]: cmor_lev = cmor.axis( vert_dim, @@ -358,6 +502,7 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) positive = {positive}") cmor_var = cmor.variable(target_var, units, axes, positive = positive) + # Write the output to disk #var = ds[target_var][:] #was this ever needed? why? cmor.write(cmor_var, var) diff --git a/run_test_file_cases.py b/run_test_file_cases.py index a588f803..1518c266 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -105,19 +105,20 @@ def run_cmor_RUN(filename, table, opt_var_name): # 4) FAIL (no longitude coordinate case) -# atmos, Amoon / ta +# atmos, AERmonZ / ta # just like #1, but lack longitude # Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / -testfile_atmos_gr1_AmonZ_nolons = \ +testfile_atmos_gr1_AERmonZ_nolons = \ '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' try: - some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') + some_return = run_cmor_RUN(testfile_atmos_gr1_AERmonZ_nolons, 'AERmonZ', opt_var_name = 'ta') except Exception as exc: print(f'exception caught: exc=\n{exc}') some_return=-1 pass -print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') +print_the_outcome(some_return,'atmos_gr1_AERmonZ_nolons / ta') +sys.exit() ## 5) SUCCEEDS ## ocean, Omon / sos From 1a15d9534c767ba85061d529089fdb7e06832469 Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Thu, 31 Oct 2024 15:15:34 -0400 Subject: [PATCH 12/12] Revert "intermediate commit- new function to compar variable dimensions. none checks, trying to generally allow this code to be more flexible. not currently working atm" This reverts commit d80b79078593c208e7be0262ca342141d52de502. --- fre/cmor/cmor_mixer.py | 195 ++++++----------------------------------- run_test_file_cases.py | 9 +- 2 files changed, 29 insertions(+), 175 deletions(-) diff --git a/fre/cmor/cmor_mixer.py b/fre/cmor/cmor_mixer.py index 6ba4cdf3..0f4ef243 100755 --- a/fre/cmor/cmor_mixer.py +++ b/fre/cmor/cmor_mixer.py @@ -20,61 +20,6 @@ # ----- \end consts ### ------ helper functions ------ ### -def check_table_dims_v_var_dims(json_table_config = None, table_var_dims = None, var_dims = None): - ''' - checks the mip table's required dimensions against the current input netcdf file's dimensions. if a req'd - dimension isn't found in the input netcdf file, either as an exact match or a match to a standard/long/out name field, - we throw an error. currently accepts three arguments, two required and one optional - json_table_config: string, optional - table_var_dims: tuple of strings, req'd - var_dims: tuple of strings, req'd - ''' - # check if the information req'd by the table is present in the list assoc with the variable - print(f'\n\n(check_table_dims_v_var_dims) attempting to compare target_var dimension names with table variable entry dimension names...') - for proj_table_var_dim in table_var_dims: - print(f'-----------------------------------------------------------------------------------------------------------------------------------------') - print(f'(check_table_dims_v_var_dims) proj_table_var_dim = {proj_table_var_dim}') - - # check the mip coordinate file for accepted standard_name, out_name, long_name flavors - cmip_coords = None - try: - json_coordinate_config = str(Path(json_table_config).parent) + '/CMIP6_coordinate.json' - print(f'(check_table_dims_v_var_dims) json_coordinate_config = {json_coordinate_config}') - with open ( json_coordinate_config, 'r', encoding="utf-8") as json_cmip_coords: - cmip_coords = json.load( json_cmip_coords ) - except Exception as exc: - print(f'(check_table_dims_v_var_dims) trying to open MIP coordinate files... caught exception: exc = \n {exc}') - print(f' WARNING wont be making extra checks against accepted coordinat names...') - - dim_is_present=False - for var_dim in var_dims: - print(f'(check_table_dims_v_var_dims) var_dim = {var_dim}') - if var_dim == proj_table_var_dim: - print(f' required coordinate dimension {proj_table_var_dim} is present in the netcdf file') - dim_is_present=True - break - - if cmip_coords is not None: - print(f' did not find the exact coordinate name {proj_table_var_dim} is present in the netcdf file ') - print(f' .... checking standard/out/long names ....' ) - accepted_names = [ - cmip_coords['axis_entry'][proj_table_var_dim]['standard_name'], - cmip_coords['axis_entry'][proj_table_var_dim]['out_name'], - cmip_coords['axis_entry'][proj_table_var_dim]['long_name'] ] - print(f'(check_table_dims_v_var_dims) accepted_names = \n {accepted_names}') - if var_dim in accepted_names: - print(f' required coordinate dimension {proj_table_var_dim} is present in the netcdf file') - print(f' WARNING!!!! not an exact match for {proj_table_var_dim}.. it actually matches one of... {accepted_names}') - dim_is_present=True - break - - print(f'-----------------------------------------------------------------------------------------------------------------------------------------\n\n') - if not dim_is_present: - raise ValueError(f'(check_table_dims_v_var_dims)' - f' ERROR: dimension {proj_table_var_dim} is required for variable {target_var} / table {Path(json_table_config).name}') - print(f'(check_table_dims_v_var_dims) done comparing dimension names of between the table and the file\'s data.') - - def copy_nc(in_nc, out_nc): ''' copy target input netcdf file in_nc to target out_nc. I have to think this is not a trivial copy @@ -248,74 +193,23 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ds = nc.Dataset(netcdf_file,'a') - # read what specific dimensions are req'd by the table for this variable - print(f'(rewrite_netcdf_file_var) attempting to read target_var dimension names FROM proj_table_vars...') - proj_table_var_dims = tuple( [ - var_dim for var_dim in proj_table_vars['variable_entry'][target_var]["dimensions"].split(' ') - ] ) - print(f' proj_table_var_dims = {proj_table_var_dims}') - print(f' type(proj_table_var_dims) = {type(proj_table_var_dims)}') - - - - - # read what specific dimensions are assoc with the variable (lat/lon/plev etc.) - print(f'(rewrite_netcdf_file_var) attempting to read target_var dimension names...') - var_dims = ds[target_var].dimensions - print(f' var_dims = {var_dims}') - - - - check_table_dims_v_var_dims( - json_table_config = json_table_config, - table_var_dims = ('latitude', 'time', 'plev19', 'longitude'),#proj_table_var_dims, - var_dims = var_dims - ) - # # check if the information req'd by the table is present in the list assoc with the variable - # print(f'(rewrite_netcdf_file_var) attempting to compare target_var dimension names with table variable entry dimension names...') - # for proj_table_var_dim in proj_table_var_dims: - # dim_is_present=False - # for var_dim in var_dims: - # if proj_table_var_dim == var_dim: - # print(f'required coordinate dimension {proj_table_var_dim} is present in the netcdf file') - # dim_is_present=True - # break - # - # if not dim_is_present: - # raise ValueError(f'(rewrite_netcdf_file_var) ERROR: dimension {proj_table_var_dim} is required for variable {target_var} / table {Path(json_table_config).name}') - # print(f'(rewrite_netcdf_file_var) done comparing dimension names of between the table and the file\'s data.') - - - # ocean grids are not implemented yet. print( '(rewrite_netcdf_file_var) checking input netcdf file for oceangrid condition') check_dataset_for_ocean_grid(ds) - # Attempt to read lat coordinates - print(f'(rewrite_netcdf_file_var) attempting to read coordinate(s), lat, lat_bnds') - lat, lat_bnds = None, None - try: - lat, lat_bnds = ds["lat"][:], ds["lat_bnds"][:] - except Exception as exc: - print(f'(rewrite_netcdf_file_var) WARNING could not read latitude coordinate. moving on.\n exc = {exc}') - print(f' lat = {lat}') - print(f' lat_bnds = {lat_bnds}') - pass - - # Attempt to read lon coordinates - print(f'(rewrite_netcdf_file_var) attempting to read coordinate(s), lon, lon_bnds') - lon, lon_bnds = None, None - try: - lon, lon_bnds = ds["lon"][:], ds["lon_bnds"][:] - except Exception as exc: - print(f'(rewrite_netcdf_file_var) WARNING could not read longitude coordinate. moving on.\n exc = {exc}') - print(f' lon = {lon}') - print(f' lon_bnds = {lon_bnds}') - pass + # figure out the dimension names programmatically TODO + # Define lat and lon dimensions + # Assume input file is lat/lon grid + lat = ds["lat"][:] + lon = ds["lon"][:] + lat_bnds = ds["lat_bnds"][:] + lon_bnds = ds["lon_bnds"][:] - # read in time_coords + units + ## Define time #time = ds["time"][:] + + # read in time_coords + units time_coords = ds["time"][:] time_coord_units = ds["time"].units print(f"(rewrite_netcdf_file_var) time_coord_units = {time_coord_units}") @@ -336,27 +230,21 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, vert_dim = get_vertical_dimension(ds, target_var) print(f"(rewrite_netcdf_file_var) Vertical dimension of {target_var}: {vert_dim}") + # grab var_dim + var_dim = len(var.shape) + print(f"(rewrite_netcdf_file_var) var_dim = {var_dim}, local_var = {local_var}") - # grab var_N_dims and check it's values - var_N_dims = len(var.shape) - print(f"(rewrite_netcdf_file_var) var_N_dims = {var_N_dims}, local_var = {local_var}") - if var_N_dims not in [3, 4]: - raise ValueError(f"var_N_dims == {var_N_dims} != 3 nor 4. stop.") - - - print(f'(rewrite_netcdf_file_var) ASSERTING FALSE NOW') - raise Exception() + # Check var_dim + if var_dim not in [3, 4]: + raise ValueError(f"var_dim == {var_dim} != 3 nor 4. stop.") - - - - # Check var_N_dims and vert_dim and assign lev if relevant. - # error if vert_dim wrong given var_N_dims + # Check var_dim and vert_dim and assign lev if relevant. + # error if vert_dim wrong given var_dim lev = None - if var_N_dims == 4: + if var_dim == 4: if vert_dim not in [ "plev30", "plev19", "plev8", "height2m", "level", "lev", "levhalf"] : - raise ValueError(f'var_N_dims={var_N_dims}, vert_dim = {vert_dim} is not supported') + raise ValueError(f'var_dim={var_dim}, vert_dim = {vert_dim} is not supported') lev = ds[vert_dim] @@ -378,27 +266,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) cmor is opening json_table_config = {json_table_config}") cmor.load_table(json_table_config) - # read units units = proj_table_vars["variable_entry"] [target_var] ["units"] print(f"(rewrite_netcdf_file_var) units={units}") - # setup cmor latitude axis if relevant - print(f'(rewrite_netcdf_file_var) assigning cmor_lat') - cmor_lat = None - if any( [ lat is None, lat_bnds is None ] ): - print(f'(rewrite_netcdf_file_var) WARNING: lat or lat_bnds is None, skipping assigning cmor_lat') - else: - cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") - - # setup cmor longitude axis if relevant - print(f'(rewrite_netcdf_file_var) assigning cmor_lon') - cmor_lon = None - if any( [ lon is None, lon_bnds is None ] ): - print(f'(rewrite_netcdf_file_var) WARNING: lon or lon_bnds is None, skipping assigning cmor_lon') - else: - cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") - - # setup cmor_time axis if relevant + cmor_lat = cmor.axis("latitude", coord_vals = lat, cell_bounds = lat_bnds, units = "degrees_N") + cmor_lon = cmor.axis("longitude", coord_vals = lon, cell_bounds = lon_bnds, units = "degrees_E") try: print( f"(rewrite_netcdf_file_var) Executing cmor.axis('time', \n" f" coord_vals = \n{time_coords}, \n" @@ -418,27 +290,11 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, ips = None # set axes for 3-dim case - if var_N_dims == 3: - axes = [] - if cmor_time is not None: - axes.append(cmor_time) - else: - print(f'(rewrite_netcdf_file_var) WARNING: cmor_time is None!!! moving on...') - if cmor_lat is not None: - axes.append(cmor_lat) - else: - print(f'(rewrite_netcdf_file_var) WARNING: cmor_lat is None!!! moving on...') - if cmor_lon is not None: - axes.append(cmor_lon) - else: - print(f'(rewrite_netcdf_file_var) WARNING: cmor_lon is None!!! moving on...') + if var_dim == 3: + axes = [cmor_time, cmor_lat, cmor_lon] print(f"(rewrite_netcdf_file_var) axes = {axes}") - - #axes2 = [cmor_time, cmor_lat, cmor_lon] - #print(f"(rewrite_netcdf_file_var) axes2 = {axes2}") - # set axes for 4-dim case - elif var_N_dims == 4: + elif var_dim == 4: if vert_dim in ["plev30", "plev19", "plev8", "height2m"]: cmor_lev = cmor.axis( vert_dim, @@ -502,7 +358,6 @@ def rewrite_netcdf_file_var ( proj_table_vars = None, print(f"(rewrite_netcdf_file_var) positive = {positive}") cmor_var = cmor.variable(target_var, units, axes, positive = positive) - # Write the output to disk #var = ds[target_var][:] #was this ever needed? why? cmor.write(cmor_var, var) diff --git a/run_test_file_cases.py b/run_test_file_cases.py index 1518c266..a588f803 100644 --- a/run_test_file_cases.py +++ b/run_test_file_cases.py @@ -105,20 +105,19 @@ def run_cmor_RUN(filename, table, opt_var_name): # 4) FAIL (no longitude coordinate case) -# atmos, AERmonZ / ta +# atmos, Amoon / ta # just like #1, but lack longitude # Result - error, File "/home/Ian.Laflotte/Working/fre-cli/fre/cmor/cmor_mixer.py", line 195, in rewrite_netcdf_file_var lon = ds["lon"][:] File "src/netCDF4/_netCDF4.pyx", line 2519, in netCDF4._netCDF4.Dataset.__getitem__ IndexError: lon not found in / -testfile_atmos_gr1_AERmonZ_nolons = \ +testfile_atmos_gr1_AmonZ_nolons = \ '/arch0/cm6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos_plev39_cmip/ts/monthly/5yr/zonavg/atmos_plev39_cmip.201001-201412.ta.nc' try: - some_return = run_cmor_RUN(testfile_atmos_gr1_AERmonZ_nolons, 'AERmonZ', opt_var_name = 'ta') + some_return = run_cmor_RUN(testfile_atmos_gr1_AmonZ_nolons, 'Amon', opt_var_name = 'ta') except Exception as exc: print(f'exception caught: exc=\n{exc}') some_return=-1 pass -print_the_outcome(some_return,'atmos_gr1_AERmonZ_nolons / ta') +print_the_outcome(some_return,'atmos_gr1_AmonZ_nolons / ta') -sys.exit() ## 5) SUCCEEDS ## ocean, Omon / sos