diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 170272d2c..436b10c45 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,6 +1,17 @@ All notable changes to this project will be documented in this file. We follow the [Semantic Versioning 2.0.0](http://semver.org/) format. +## v4.5.6.1 - 2024-09-13 - [PR#1271](https://github.com/NOAA-OWP/inundation-mapping/pull/1271) + +Upgrade for `test_case_by_hydro_id.py` that enables the ability to run on HUCs with differing projections (e.g. Alaska) and adds a logging system. + +### Changes + +- `tools/test_case_by_hydro_id.py`: Moved the reprojection step to accommodate multiple input projections and fixed a lot of unnecessary logic. Also added an optional logging system that is activated by the new `-l` flag. + +

+ + ## v4.5.6.0 - 2024-08-23 - [PR#1253](https://github.com/NOAA-OWP/inundation-mapping/pull/1253) Upgrades Python packages and dependencies and fixes backwards incompatibilities with new version of `geopandas`. Major changes include: @@ -38,7 +49,7 @@ Updated the gauge crosswalk and SRC adjustment routine to use the ras2fim v2 fil - `src/src_adjust_ras2fim_rating.py`: Updated code logic to use the huc-specific input files containing the ras2fim rating curve data (previous ras2fim input file contained all hucs in one csv) - `src/utils/shared_functions.py`: Added function to find huc subdirectories with the same name btw two parent folders -

+

## v4.5.4.4 - 2024-08-02 - [PR#1238](https://github.com/NOAA-OWP/inundation-mapping/pull/1238) diff --git a/tools/test_case_by_hydro_id.py b/tools/test_case_by_hydro_id.py index 5b24fdd3a..73186f33e 100644 --- a/tools/test_case_by_hydro_id.py +++ b/tools/test_case_by_hydro_id.py @@ -2,6 +2,7 @@ import argparse import os +import traceback from datetime import datetime import geopandas as gpd @@ -10,10 +11,25 @@ from run_test_case import Test_Case from shapely.validation import make_valid from tools_shared_functions import compute_stats_from_contingency_table +from tqdm import tqdm gpd.options.io_engine = "pyogrio" +""" +This module uses zonal stats to subdivide alpha metrics by each HAND catchment. +The output is a vector geopackage and is also known as the "FIM Performance" layer +when loaded into HydroVIS. At the time of this commit, it takes approximately +32 hours to complete. + +Example usage: +python /foss_fim/tools/test_case_by_hydro_id.py \ + -b all \ + -v fim_4_5_2_11 \ + -g /outputs/fim_performance_v4_5_2_11.gpkg \ + -l +""" + ##################################################### # Perform zonal stats is a funtion stored in pixel_counter.py. @@ -180,39 +196,7 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench): return in_mem_df -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.') - - parser.add_argument( - '-b', - '--benchmark_category', - help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim', - required=True, - ) - parser.add_argument( - '-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True - ) - parser.add_argument( - '-g', - '--gpkg', - help='filepath and filename to hold exported gpkg (and csv) file. ' - 'Similar to /data/path/fim_performance_catchments.gpkg Need to use gpkg as output.', - required=True, - ) - - # Assign variables from arguments. - args = vars(parser.parse_args()) - benchmark_category = args['benchmark_category'] - version = args['version'] - csv = args['gpkg'] - - print("================================") - print("Start test_case_by_hydroid.py") - start_time = datetime.now() - dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") - print(f"started: {dt_string}") - print() - +def catchment_zonal_stats(benchmark_category, version, csv, log): # Execution code csv_output = gpd.GeoDataFrame( columns=[ @@ -237,92 +221,167 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench): 'geometry', ], geometry='geometry', - ) + ).set_crs('EPSG:3857') # This funtion, relies on the Test_Case class defined in run_test_case.py to list all available test cases - print('listing_test_cases_with_updates') all_test_cases = Test_Case.list_all_test_cases( version=version, archive=True, benchmark_categories=[] if benchmark_category == "all" else [benchmark_category], ) + print(f'Found {len(all_test_cases)} test cases') + if log: + log.write(f'Found {len(all_test_cases)} test cases...\n') + missing_hucs = [] - for test_case_class in all_test_cases: + for test_case_class in tqdm(all_test_cases, desc=f'Running {len(all_test_cases)} test cases'): if not os.path.exists(test_case_class.fim_dir): print(f'{test_case_class.fim_dir} does not exist') + missing_hucs.append(test_case_class) + if log: + log.write(f'{test_case_class.fim_dir} does not exist\n') continue - print(test_case_class.fim_dir, end='\r') + if log: + log.write(test_case_class.test_id + '\n') agreement_dict = test_case_class.get_current_agreements() for agree_rast in agreement_dict: - print(f'performing_zonal_stats for {agree_rast}') - branches_dir = os.path.join(test_case_class.fim_dir, 'branches') - for branches in os.listdir(branches_dir): - if branches != "0": - continue - huc_gpkg = os.path.join(branches_dir, branches) + # We are only using branch 0 catchments to define boundaries for zonal stats + catchment_gpkg = os.path.join( + test_case_class.fim_dir, + 'branches', + "gw_catchments_reaches_filtered_addedAttributes_crosswalked_0.gpkg", + ) - string_manip = ( - "gw_catchments_reaches_filtered_addedAttributes_crosswalked_" + branches + ".gpkg" - ) + define_mag = agree_rast.split(version) + define_mag_1 = define_mag[1].split('/') + mag = define_mag_1[1] - huc_gpkg = os.path.join(huc_gpkg, string_manip) + if log: + log.write(f' {define_mag[1]}\n') - define_mag = agree_rast.split(version) + stats = perform_zonal_stats(catchment_gpkg, agree_rast) + if stats == []: + continue - define_mag_1 = define_mag[1].split('/') + get_geom = gpd.read_file(catchment_gpkg) - mag = define_mag_1[1] + get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1) - stats = perform_zonal_stats(huc_gpkg, agree_rast) - if stats == []: - continue + in_mem_df = assemble_hydro_alpha_for_single_huc( + stats, test_case_class.huc, mag, test_case_class.benchmark_cat + ) - print('assembling_hydroalpha_for_single_huc') - get_geom = gpd.read_file(huc_gpkg) + hydro_geom_df = get_geom[["HydroID", "geometry"]] - get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1) + geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner').to_crs('EPSG:3857') - in_mem_df = assemble_hydro_alpha_for_single_huc( - stats, test_case_class.huc, mag, test_case_class.benchmark_cat - ) + concat_df_list = [geom_output, csv_output] - hydro_geom_df = get_geom[["HydroID", "geometry"]] + csv_output = pd.concat(concat_df_list, sort=False) - geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner') + if missing_hucs: + log.write( + f"There were {len(missing_hucs)} HUCs missing from the input FIM version:\n" + + "\n".join([h.fim_dir for h in missing_hucs]) + ) - concat_df_list = [geom_output, csv_output] + print() + print(csv_output.groupby('BENCH').size()) + print(f'total {len(csv_output)}') + log.write("\n------------------------------------\n") + csv_output.groupby('BENCH').size().to_string(log) + log.write(f'\ntotal {len(csv_output)}\n') + + print('Writing to GPKG') + log.write(f'Writing geopackage {csv}\n') + csv_output.to_file(csv, driver="GPKG") - csv_output = pd.concat(concat_df_list, sort=False) + # Add version information to csv_output dataframe + csv_output['version'] = version - print('projecting to 3857') - csv_output = csv_output.to_crs('EPSG:3857') + print('Writing to CSV') + csv_path = csv.replace(".gpkg", ".csv") + log.write(f'Writing CSV {csv_path}\n') + csv_output.to_csv(csv_path) # Save to CSV - print('manipulating the input string to exclude gpkg and include csv') - csv_path_list = csv.split(".") - csv_path = csv_path_list[0] - csv_path_dot = csv_path + ".csv" - print('writing_to_gpkg') - csv_output.to_file(csv, driver="GPKG") +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.') - # Add version information to csv_output dataframe - csv_output['version'] = version + parser.add_argument( + '-b', + '--benchmark_category', + help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim', + required=True, + ) + parser.add_argument( + '-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True + ) + parser.add_argument( + '-g', + '--gpkg', + help='Filepath and filename to hold exported gpkg file. ' + 'Similar to /data/path/fim_performance_catchments.gpkg. A CSV with the same name will also be written.', + required=True, + ) + parser.add_argument( + '-l', + '--log', + help='Optional flag to write a log file with the same name as the --GPKG.', + required=False, + default=None, + action='store_true', + ) - print('writing_to_csv') - csv_output.to_csv(csv_path_dot) # Save to CSV + # Assign variables from arguments. + args = vars(parser.parse_args()) + benchmark_category = args['benchmark_category'] + version = args['version'] + csv = args['gpkg'] + log = args['log'] print("================================") - print("End test_case_by_hydroid.py") + print("Start test_case_by_hydroid.py") + start_time = datetime.now() + dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + print(f"started: {dt_string}") + print() + + ## Initiate log file + if log: + log = open(csv.replace('.gpkg', '.log'), "w") + log.write('START TIME: ' + str(start_time) + '\n') + log.write('#########################################################\n\n') + log.write('') + log.write(f'Runtime args:\n {args}\n\n') + + # This is the main execution -- try block is to catch and log errors + try: + catchment_zonal_stats(benchmark_category, version, csv, log) + except Exception as ex: + print(f"ERROR: Execution failed. Please check the log file for details. \n {log.name if log else ''}") + if log: + log.write(f"ERROR -->\n{ex}") + traceback.print_exc(file=log) + if log: + log.write(f'Errored at: {str(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))} \n') end_time = datetime.now() - dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S") + dt_string = end_time.strftime("%m/%d/%Y %H:%M:%S") + tot_run_time = end_time - start_time + if log: + log.write(f'END TIME: {str(end_time)} \n') + log.write(f'TOTAL RUN TIME: {str(tot_run_time)} \n') + log.close() + + print("================================") + print("End test_case_by_hydroid.py") + print(f"ended: {dt_string}") - # calculate duration - time_duration = end_time - start_time - print(f"Duration: {str(time_duration).split('.')[0]}") + print(f"Duration: {str(tot_run_time).split('.')[0]}") print()