Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
All notable changes to this project will be documented in this file.
We follow the [Semantic Versioning 2.0.0](http://semver.org/) format.

## v4.5.6.1 - 2024-09-13 - [PR#1271](https://github.com/NOAA-OWP/inundation-mapping/pull/1271)

Upgrade for `test_case_by_hydro_id.py` that enables the ability to run on HUCs with differing projections (e.g. Alaska) and adds a logging system.

### Changes

- `tools/test_case_by_hydro_id.py`: Moved the reprojection step to accommodate multiple input projections and fixed a lot of unnecessary logic. Also added an optional logging system that is activated by the new `-l` flag.

<br/><br/>


## v4.5.6.0 - 2024-08-23 - [PR#1253](https://github.com/NOAA-OWP/inundation-mapping/pull/1253)

Upgrades Python packages and dependencies and fixes backwards incompatibilities with new version of `geopandas`. Major changes include:
Expand Down Expand Up @@ -38,7 +49,7 @@ Updated the gauge crosswalk and SRC adjustment routine to use the ras2fim v2 fil
- `src/src_adjust_ras2fim_rating.py`: Updated code logic to use the huc-specific input files containing the ras2fim rating curve data (previous ras2fim input file contained all hucs in one csv)
- `src/utils/shared_functions.py`: Added function to find huc subdirectories with the same name btw two parent folders

<br/><br/>
<br/><br/>

## v4.5.4.4 - 2024-08-02 - [PR#1238](https://github.com/NOAA-OWP/inundation-mapping/pull/1238)

Expand Down
219 changes: 139 additions & 80 deletions tools/test_case_by_hydro_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import argparse
import os
import traceback
from datetime import datetime

import geopandas as gpd
Expand All @@ -10,10 +11,25 @@
from run_test_case import Test_Case
from shapely.validation import make_valid
from tools_shared_functions import compute_stats_from_contingency_table
from tqdm import tqdm


gpd.options.io_engine = "pyogrio"

"""
This module uses zonal stats to subdivide alpha metrics by each HAND catchment.
The output is a vector geopackage and is also known as the "FIM Performance" layer
when loaded into HydroVIS. At the time of this commit, it takes approximately
32 hours to complete.

Example usage:
python /foss_fim/tools/test_case_by_hydro_id.py \
-b all \
-v fim_4_5_2_11 \
-g /outputs/fim_performance_v4_5_2_11.gpkg \
-l
"""


#####################################################
# Perform zonal stats is a funtion stored in pixel_counter.py.
Expand Down Expand Up @@ -180,39 +196,7 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench):
return in_mem_df


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.')

parser.add_argument(
'-b',
'--benchmark_category',
help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim',
required=True,
)
parser.add_argument(
'-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True
)
parser.add_argument(
'-g',
'--gpkg',
help='filepath and filename to hold exported gpkg (and csv) file. '
'Similar to /data/path/fim_performance_catchments.gpkg Need to use gpkg as output.',
required=True,
)

# Assign variables from arguments.
args = vars(parser.parse_args())
benchmark_category = args['benchmark_category']
version = args['version']
csv = args['gpkg']

print("================================")
print("Start test_case_by_hydroid.py")
start_time = datetime.now()
dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
print(f"started: {dt_string}")
print()

def catchment_zonal_stats(benchmark_category, version, csv, log):
# Execution code
csv_output = gpd.GeoDataFrame(
columns=[
Expand All @@ -237,92 +221,167 @@ def assemble_hydro_alpha_for_single_huc(stats, huc8, mag, bench):
'geometry',
],
geometry='geometry',
)
).set_crs('EPSG:3857')

# This funtion, relies on the Test_Case class defined in run_test_case.py to list all available test cases
print('listing_test_cases_with_updates')
all_test_cases = Test_Case.list_all_test_cases(
version=version,
archive=True,
benchmark_categories=[] if benchmark_category == "all" else [benchmark_category],
)
print(f'Found {len(all_test_cases)} test cases')
if log:
log.write(f'Found {len(all_test_cases)} test cases...\n')
missing_hucs = []

for test_case_class in all_test_cases:
for test_case_class in tqdm(all_test_cases, desc=f'Running {len(all_test_cases)} test cases'):
if not os.path.exists(test_case_class.fim_dir):
print(f'{test_case_class.fim_dir} does not exist')
missing_hucs.append(test_case_class)
if log:
log.write(f'{test_case_class.fim_dir} does not exist\n')
continue

print(test_case_class.fim_dir, end='\r')
if log:
log.write(test_case_class.test_id + '\n')

agreement_dict = test_case_class.get_current_agreements()

for agree_rast in agreement_dict:
print(f'performing_zonal_stats for {agree_rast}')

branches_dir = os.path.join(test_case_class.fim_dir, 'branches')
for branches in os.listdir(branches_dir):
if branches != "0":
continue
huc_gpkg = os.path.join(branches_dir, branches)
# We are only using branch 0 catchments to define boundaries for zonal stats
catchment_gpkg = os.path.join(
test_case_class.fim_dir,
'branches',
"gw_catchments_reaches_filtered_addedAttributes_crosswalked_0.gpkg",
)

string_manip = (
"gw_catchments_reaches_filtered_addedAttributes_crosswalked_" + branches + ".gpkg"
)
define_mag = agree_rast.split(version)
define_mag_1 = define_mag[1].split('/')
mag = define_mag_1[1]

huc_gpkg = os.path.join(huc_gpkg, string_manip)
if log:
log.write(f' {define_mag[1]}\n')

define_mag = agree_rast.split(version)
stats = perform_zonal_stats(catchment_gpkg, agree_rast)
if stats == []:
continue

define_mag_1 = define_mag[1].split('/')
get_geom = gpd.read_file(catchment_gpkg)

mag = define_mag_1[1]
get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1)

stats = perform_zonal_stats(huc_gpkg, agree_rast)
if stats == []:
continue
in_mem_df = assemble_hydro_alpha_for_single_huc(
stats, test_case_class.huc, mag, test_case_class.benchmark_cat
)

print('assembling_hydroalpha_for_single_huc')
get_geom = gpd.read_file(huc_gpkg)
hydro_geom_df = get_geom[["HydroID", "geometry"]]

get_geom['geometry'] = get_geom.apply(lambda row: make_valid(row.geometry), axis=1)
geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner').to_crs('EPSG:3857')

in_mem_df = assemble_hydro_alpha_for_single_huc(
stats, test_case_class.huc, mag, test_case_class.benchmark_cat
)
concat_df_list = [geom_output, csv_output]

hydro_geom_df = get_geom[["HydroID", "geometry"]]
csv_output = pd.concat(concat_df_list, sort=False)

geom_output = hydro_geom_df.merge(in_mem_df, on='HydroID', how='inner')
if missing_hucs:
log.write(
f"There were {len(missing_hucs)} HUCs missing from the input FIM version:\n"
+ "\n".join([h.fim_dir for h in missing_hucs])
)

concat_df_list = [geom_output, csv_output]
print()
print(csv_output.groupby('BENCH').size())
print(f'total {len(csv_output)}')
log.write("\n------------------------------------\n")
csv_output.groupby('BENCH').size().to_string(log)
log.write(f'\ntotal {len(csv_output)}\n')

print('Writing to GPKG')
log.write(f'Writing geopackage {csv}\n')
csv_output.to_file(csv, driver="GPKG")

csv_output = pd.concat(concat_df_list, sort=False)
# Add version information to csv_output dataframe
csv_output['version'] = version

print('projecting to 3857')
csv_output = csv_output.to_crs('EPSG:3857')
print('Writing to CSV')
csv_path = csv.replace(".gpkg", ".csv")
log.write(f'Writing CSV {csv_path}\n')
csv_output.to_csv(csv_path) # Save to CSV

print('manipulating the input string to exclude gpkg and include csv')
csv_path_list = csv.split(".")
csv_path = csv_path_list[0]
csv_path_dot = csv_path + ".csv"

print('writing_to_gpkg')
csv_output.to_file(csv, driver="GPKG")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Produces alpha metrics by hyrdoid.')

# Add version information to csv_output dataframe
csv_output['version'] = version
parser.add_argument(
'-b',
'--benchmark_category',
help='Choice of truth data. Options are: all, ble, ifc, nws, usgs, ras2fim',
required=True,
)
parser.add_argument(
'-v', '--version', help='The fim version to use. Should be similar to fim_3_0_24_14_ms', required=True
)
parser.add_argument(
'-g',
'--gpkg',
help='Filepath and filename to hold exported gpkg file. '
'Similar to /data/path/fim_performance_catchments.gpkg. A CSV with the same name will also be written.',
required=True,
)
parser.add_argument(
'-l',
'--log',
help='Optional flag to write a log file with the same name as the --GPKG.',
required=False,
default=None,
action='store_true',
)

print('writing_to_csv')
csv_output.to_csv(csv_path_dot) # Save to CSV
# Assign variables from arguments.
args = vars(parser.parse_args())
benchmark_category = args['benchmark_category']
version = args['version']
csv = args['gpkg']
log = args['log']

print("================================")
print("End test_case_by_hydroid.py")
print("Start test_case_by_hydroid.py")
start_time = datetime.now()
dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
print(f"started: {dt_string}")
print()

## Initiate log file
if log:
log = open(csv.replace('.gpkg', '.log'), "w")
log.write('START TIME: ' + str(start_time) + '\n')
log.write('#########################################################\n\n')
log.write('')
log.write(f'Runtime args:\n {args}\n\n')

# This is the main execution -- try block is to catch and log errors
try:
catchment_zonal_stats(benchmark_category, version, csv, log)
except Exception as ex:
print(f"ERROR: Execution failed. Please check the log file for details. \n {log.name if log else ''}")
if log:
log.write(f"ERROR -->\n{ex}")
traceback.print_exc(file=log)
if log:
log.write(f'Errored at: {str(datetime.now().strftime("%m/%d/%Y %H:%M:%S"))} \n')

end_time = datetime.now()
dt_string = datetime.now().strftime("%m/%d/%Y %H:%M:%S")
dt_string = end_time.strftime("%m/%d/%Y %H:%M:%S")
tot_run_time = end_time - start_time
if log:
log.write(f'END TIME: {str(end_time)} \n')
log.write(f'TOTAL RUN TIME: {str(tot_run_time)} \n')
log.close()

print("================================")
print("End test_case_by_hydroid.py")

print(f"ended: {dt_string}")

# calculate duration
time_duration = end_time - start_time
print(f"Duration: {str(time_duration).split('.')[0]}")
print(f"Duration: {str(tot_run_time).split('.')[0]}")
print()
Loading