diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 170272d2c..436b10c45 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.
+## v4.5.6.1 - 2024-09-13 - [PR#1271](https://github.com/NOAA-OWP/inundation-mapping/pull/1271)
+
+Upgrade for `test_case_by_hydro_id.py` that enables the ability to run on HUCs with differing projections (e.g. Alaska) and adds a logging system.
+
+### Changes
+
+- `tools/test_case_by_hydro_id.py`: Moved the reprojection step to accommodate multiple input projections and fixed a lot of unnecessary logic. Also added an optional logging system that is activated by the new `-l` flag.
+
+
+
+
## v4.5.6.0 - 2024-08-23 - [PR#1253](https://github.com/NOAA-OWP/inundation-mapping/pull/1253)
Upgrades Python packages and dependencies and fixes backwards incompatibilities with new version of `geopandas`. Major changes include:
@@ -38,7 +49,7 @@ Updated the gauge crosswalk and SRC adjustment routine to use the ras2fim v2 fil
- `src/src_adjust_ras2fim_rating.py`: Updated code logic to use the huc-specific input files containing the ras2fim rating curve data (previous ras2fim input file contained all hucs in one csv)
- `src/utils/shared_functions.py`: Added function to find huc subdirectories with the same name btw two parent folders
-
+
## v4.5.4.4 - 2024-08-02 - [PR#1238](https://github.com/NOAA-OWP/inundation-mapping/pull/1238)
diff --git a/tools/test_case_by_hydro_id.py b/tools/test_case_by_hydro_id.py
index 5b24fdd3a..73186f33e 100644
--- a/tools/test_case_by_hydro_id.py
+++ b/tools/test_case_by_hydro_id.py
@@ -2,6 +2,7 @@
import argparse
import os
+import traceback
from datetime import datetime
import geopandas as gpd
@@ -10,10 +11,25 @@
from run_test_case import Test_Case
from shapely.validation import make_valid
from tools_shared_functions import compute_stats_from_contingency_table
+from tqdm import tqdm
gpd.options.io_engine = "pyogrio"
+"""
+This module uses zonal stats to subdivide alpha metrics by each HAND catchment.
+The output is a vector geopackage and is also known as the "FIM Performance" layer
+when loaded into HydroVIS. At the time of this commit, it takes approximately
+32 hours to complete.
+
+Example usage:
+python /foss_fim/tools/test_case_by_hydro_id.py \
+ -b all \
+ -v fim_4_5_2_11 \
+ -g /outputs/fim_performance_v4_5_2_11.gpkg \
+ -l
+"""
+
#####################################################
# Perform zonal stats is a funtion stored in pixel_counter.py.
@@ -180,39 +196,7 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench):
return in_mem_df
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.')
-
- parser.add_argument(
- '-b',
- '--benchmark_category',
- help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim',
- required=True,
- )
- parser.add_argument(
- '-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True
- )
- parser.add_argument(
- '-g',
- '--gpkg',
- help='filepath and filename to hold exported gpkg (and csv) file. '
- 'Similar to /data/path/fim_performance_catchments.gpkg Need to use gpkg as output.',
- required=True,
- )
-
- # Assign variables from arguments.
- args = vars(parser.parse_args())
- benchmark_category = args['benchmark_category']
- version = args['version']
- csv = args['gpkg']
-
- print("================================")
- print("Start test_case_by_hydroid.py")
- start_time = datetime.now()
- dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
- print(f"started: {dt_string}")
- print()
-
+def catchment_zonal_stats(benchmark_category, version, csv, log):
# Execution code
csv_output = gpd.GeoDataFrame(
columns=[
@@ -237,92 +221,167 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench):
'geometry',
],
geometry='geometry',
- )
+ ).set_crs('EPSG:3857')
# This funtion, relies on the Test_Case class defined in run_test_case.py to list all available test cases
- print('listing_test_cases_with_updates')
all_test_cases = Test_Case.list_all_test_cases(
version=version,
archive=True,
benchmark_categories=[] if benchmark_category == "all" else [benchmark_category],
)
+ print(f'Found {len(all_test_cases)} test cases')
+ if log:
+ log.write(f'Found {len(all_test_cases)} test cases...\n')
+ missing_hucs = []
- for test_case_class in all_test_cases:
+ for test_case_class in tqdm(all_test_cases, desc=f'Running {len(all_test_cases)} test cases'):
if not os.path.exists(test_case_class.fim_dir):
print(f'{test_case_class.fim_dir} does not exist')
+ missing_hucs.append(test_case_class)
+ if log:
+ log.write(f'{test_case_class.fim_dir} does not exist\n')
continue
- print(test_case_class.fim_dir, end='\r')
+ if log:
+ log.write(test_case_class.test_id + '\n')
agreement_dict = test_case_class.get_current_agreements()
for agree_rast in agreement_dict:
- print(f'performing_zonal_stats for {agree_rast}')
- branches_dir = os.path.join(test_case_class.fim_dir, 'branches')
- for branches in os.listdir(branches_dir):
- if branches != "0":
- continue
- huc_gpkg = os.path.join(branches_dir, branches)
+ # We are only using branch 0 catchments to define boundaries for zonal stats
+ catchment_gpkg = os.path.join(
+ test_case_class.fim_dir,
+ 'branches',
+ "gw_catchments_reaches_filtered_addedAttributes_crosswalked_0.gpkg",
+ )
- string_manip = (
- "gw_catchments_reaches_filtered_addedAttributes_crosswalked_" + branches + ".gpkg"
- )
+ define_mag = agree_rast.split(version)
+ define_mag_1 = define_mag[1].split('/')
+ mag = define_mag_1[1]
- huc_gpkg = os.path.join(huc_gpkg, string_manip)
+ if log:
+ log.write(f' {define_mag[1]}\n')
- define_mag = agree_rast.split(version)
+ stats = perform_zonal_stats(catchment_gpkg, agree_rast)
+ if stats == []:
+ continue
- define_mag_1 = define_mag[1].split('/')
+ get_geom = gpd.read_file(catchment_gpkg)
- mag = define_mag_1[1]
+ get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1)
- stats = perform_zonal_stats(huc_gpkg, agree_rast)
- if stats == []:
- continue
+ in_mem_df = assemble_hydro_alpha_for_single_huc(
+ stats, test_case_class.huc, mag, test_case_class.benchmark_cat
+ )
- print('assembling_hydroalpha_for_single_huc')
- get_geom = gpd.read_file(huc_gpkg)
+ hydro_geom_df = get_geom[["HydroID", "geometry"]]
- get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1)
+ geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner').to_crs('EPSG:3857')
- in_mem_df = assemble_hydro_alpha_for_single_huc(
- stats, test_case_class.huc, mag, test_case_class.benchmark_cat
- )
+ concat_df_list = [geom_output, csv_output]
- hydro_geom_df = get_geom[["HydroID", "geometry"]]
+ csv_output = pd.concat(concat_df_list, sort=False)
- geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner')
+ if missing_hucs:
+ log.write(
+ f"There were {len(missing_hucs)} HUCs missing from the input FIM version:\n"
+ + "\n".join([h.fim_dir for h in missing_hucs])
+ )
- concat_df_list = [geom_output, csv_output]
+ print()
+ print(csv_output.groupby('BENCH').size())
+ print(f'total {len(csv_output)}')
+ log.write("\n------------------------------------\n")
+ csv_output.groupby('BENCH').size().to_string(log)
+ log.write(f'\ntotal {len(csv_output)}\n')
+
+ print('Writing to GPKG')
+ log.write(f'Writing geopackage {csv}\n')
+ csv_output.to_file(csv, driver="GPKG")
- csv_output = pd.concat(concat_df_list, sort=False)
+ # Add version information to csv_output dataframe
+ csv_output['version'] = version
- print('projecting to 3857')
- csv_output = csv_output.to_crs('EPSG:3857')
+ print('Writing to CSV')
+ csv_path = csv.replace(".gpkg", ".csv")
+ log.write(f'Writing CSV {csv_path}\n')
+ csv_output.to_csv(csv_path) # Save to CSV
- print('manipulating the input string to exclude gpkg and include csv')
- csv_path_list = csv.split(".")
- csv_path = csv_path_list[0]
- csv_path_dot = csv_path + ".csv"
- print('writing_to_gpkg')
- csv_output.to_file(csv, driver="GPKG")
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.')
- # Add version information to csv_output dataframe
- csv_output['version'] = version
+ parser.add_argument(
+ '-b',
+ '--benchmark_category',
+ help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim',
+ required=True,
+ )
+ parser.add_argument(
+ '-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True
+ )
+ parser.add_argument(
+ '-g',
+ '--gpkg',
+ help='Filepath and filename to hold exported gpkg file. '
+ 'Similar to /data/path/fim_performance_catchments.gpkg. A CSV with the same name will also be written.',
+ required=True,
+ )
+ parser.add_argument(
+ '-l',
+ '--log',
+ help='Optional flag to write a log file with the same name as the --GPKG.',
+ required=False,
+ default=None,
+ action='store_true',
+ )
- print('writing_to_csv')
- csv_output.to_csv(csv_path_dot) # Save to CSV
+ # Assign variables from arguments.
+ args = vars(parser.parse_args())
+ benchmark_category = args['benchmark_category']
+ version = args['version']
+ csv = args['gpkg']
+ log = args['log']
print("================================")
- print("End test_case_by_hydroid.py")
+ print("Start test_case_by_hydroid.py")
+ start_time = datetime.now()
+ dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
+ print(f"started: {dt_string}")
+ print()
+
+ ## Initiate log file
+ if log:
+ log = open(csv.replace('.gpkg', '.log'), "w")
+ log.write('START TIME: ' + str(start_time) + '\n')
+ log.write('#########################################################\n\n')
+ log.write('')
+ log.write(f'Runtime args:\n {args}\n\n')
+
+ # This is the main execution -- try block is to catch and log errors
+ try:
+ catchment_zonal_stats(benchmark_category, version, csv, log)
+ except Exception as ex:
+ print(f"ERROR: Execution failed. Please check the log file for details. \n {log.name if log else ''}")
+ if log:
+ log.write(f"ERROR -->\n{ex}")
+ traceback.print_exc(file=log)
+ if log:
+ log.write(f'Errored at: {str(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))} \n')
end_time = datetime.now()
- dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
+ dt_string = end_time.strftime("%m/%d/%Y %H:%M:%S")
+ tot_run_time = end_time - start_time
+ if log:
+ log.write(f'END TIME: {str(end_time)} \n')
+ log.write(f'TOTAL RUN TIME: {str(tot_run_time)} \n')
+ log.close()
+
+ print("================================")
+ print("End test_case_by_hydroid.py")
+
print(f"ended: {dt_string}")
- # calculate duration
- time_duration = end_time - start_time
- print(f"Duration: {str(time_duration).split('.')[0]}")
+ print(f"Duration: {str(tot_run_time).split('.')[0]}")
print()