-
Notifications
You must be signed in to change notification settings - Fork 1
DAS-2418: Add exclusions for SMAP L3 string variables #33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
89a158a
fcd2987
b099eb7
dd1fcc8
69d3374
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| 1.4.0 | ||
| 1.5.0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,3 +4,4 @@ earthdata-varinfo ~= 3.0.2 | |
| harmony-service-lib ~= 2.5.0 | ||
| netCDF4 ~= 1.6.5 | ||
| xarray == 2025.9.0 | ||
| h5netcdf ~= 1.6.4 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -394,3 +394,79 @@ def sample_varinfo_test04( | |
| return VarInfoFromNetCDF4( | ||
| sample_netcdf4_file, config_file=varinfo_config_file, short_name='TEST04' | ||
| ) | ||
|
|
||
|
|
||
| @fixture(scope='function') | ||
| def sample_netcdf4_file_test05(temp_dir) -> str: | ||
| """Create a sample NetCDF-4 file for testing excluding variables.""" | ||
| file_name = path_join(temp_dir, 'test_input_05.nc') | ||
|
|
||
| sample_datatree = xr.DataTree( | ||
| xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_utc_seconds': xr.DataArray(np.ones((3, 3))), | ||
|
||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
| ) | ||
|
|
||
| sample_datatree['/sub_group'] = xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_utc_seconds': xr.DataArray(np.ones((3, 3))), | ||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
|
|
||
| sample_datatree['/sub_group/nested_group'] = xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_utc_seconds': xr.DataArray(np.ones((3, 3))), | ||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
|
|
||
| sample_datatree.to_netcdf(file_name, encoding=None) | ||
| return file_name | ||
|
|
||
|
|
||
| @fixture(scope='function') | ||
| def expected_output_netcdf4_file_test05(temp_dir) -> str: | ||
| """Create a sample NetCDF-4 file for testing excluding variables. | ||
|
|
||
| The generated file omits the 'string_time_utc_seconds' variable from each group. | ||
| This ensures that the metadata annotator correctly excludes these variables | ||
| from its output during testing. | ||
| """ | ||
| file_name = path_join(temp_dir, 'test_input_05.nc') | ||
|
|
||
| sample_datatree = xr.DataTree( | ||
| xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
| ) | ||
|
|
||
| sample_datatree['/sub_group'] = xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
|
|
||
| sample_datatree['/sub_group/nested_group'] = xr.Dataset( | ||
| data_vars={ | ||
| 'string_time_seconds': xr.DataArray(np.ones((3, 3))), | ||
| }, | ||
| ) | ||
|
|
||
| sample_datatree.to_netcdf(file_name, encoding=None) | ||
| return file_name | ||
|
|
||
|
|
||
| @fixture(scope='function') | ||
| def sample_varinfo_test05( | ||
| sample_netcdf4_file_test05, varinfo_config_file | ||
| ) -> VarInfoFromNetCDF4: | ||
| """Create sample VarInfoFromNetCDF4 instance.""" | ||
| return VarInfoFromNetCDF4( | ||
| sample_netcdf4_file_test05, config_file=varinfo_config_file, short_name='TEST05' | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just commenting on this file as a whole - I never thought about creating a test config file like this, kinda cool! I suppose I wonder if it might be an issue that we're not directly testing the changes to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While I think both approaches are okay, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, okay got it! And that makes sense especially for the metadata annotator. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, do you think it'd be worthwhile to test the specific SPL3FTA configuration, or nah? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The precedent set in this repository has been to avoid tests for specific collection configuration rules. I think this is mainly because it would inflate the test suite (would require tests for every collection that is configured) and we'd also need to store granules for these tests in the repository. We've been reliant on properly testing these collection specific configuration rules in the test instructions for the PRs that add them. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,7 @@ | |
| from metadata_annotator.annotate import ( | ||
| annotate_granule, | ||
| create_new_variable, | ||
| delete_variable, | ||
| get_dimension_variables, | ||
| get_geotransform_config, | ||
| get_grid_start_index, | ||
|
|
@@ -19,7 +20,9 @@ | |
| get_spatial_dimension_type, | ||
| get_spatial_dimension_variables, | ||
| get_start_index_from_row_col_variable, | ||
| get_variables_to_delete, | ||
| is_exact_path, | ||
| is_excluded_science_variable, | ||
| is_temporary_attribute, | ||
| update_dimension_names, | ||
| update_dimension_variable_attributes, | ||
|
|
@@ -250,6 +253,31 @@ def test_annotate_granule_no_changes( | |
| assert results_datatree.identical(expected_datatree) | ||
|
|
||
|
|
||
| def test_annotate_granule_variable_exclusions_only( | ||
| sample_netcdf4_file_test05, | ||
| expected_output_netcdf4_file_test05, | ||
| temp_output_file_path, | ||
| varinfo_config_file, | ||
| mocker, | ||
| ): | ||
| """Confirm that a granule with only variable exclusion configuration is updated.""" | ||
| _ = mocker.patch('metadata_annotator.annotate.update_history_metadata') | ||
| annotate_granule( | ||
| sample_netcdf4_file_test05, | ||
| temp_output_file_path, | ||
| varinfo_config_file, | ||
| 'TEST05', | ||
| ) | ||
|
|
||
| with ( | ||
| xr.open_datatree( | ||
| expected_output_netcdf4_file_test05, decode_times=False | ||
| ) as expected_datatree, | ||
| xr.open_datatree(temp_output_file_path, decode_times=False) as results_datatree, | ||
| ): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first time I really hate black's formatting. 😬 |
||
| assert results_datatree.identical(expected_datatree) | ||
|
|
||
|
|
||
| def test_annotate_granule_with_dimension_variable_updates(temp_output_file_path): | ||
| """Confirm that a granule has all metadata updated as expected. | ||
|
|
||
|
|
@@ -716,3 +744,65 @@ def test_update_dimension_variables(sample_netcdf4_file_test04, sample_varinfo_t | |
| assert np.allclose( | ||
| test_datatree['sub_group'].dataset['y'], expected_y_values | ||
| ) | ||
|
|
||
|
|
||
| def test_get_variables_to_delete(sample_varinfo_test05): | ||
| """Ensure correct list of variables to delete is obtained.""" | ||
| expected_result = set( | ||
| [ | ||
| '/string_time_utc_seconds', | ||
| '/sub_group/string_time_utc_seconds', | ||
| '/sub_group/nested_group/string_time_utc_seconds', | ||
| ] | ||
| ) | ||
| assert set(get_variables_to_delete(sample_varinfo_test05)) == expected_result | ||
|
|
||
|
|
||
| def test_is_excluded_science_variable(sample_varinfo_test05): | ||
| """Ensure excluded science variables are determined correctly.""" | ||
| assert is_excluded_science_variable( | ||
| sample_varinfo_test05, '/string_time_utc_seconds' | ||
| ) | ||
| assert is_excluded_science_variable( | ||
| sample_varinfo_test05, '/sub_group/string_time_utc_seconds' | ||
| ) | ||
| assert is_excluded_science_variable( | ||
| sample_varinfo_test05, '/sub_group/nested_group/string_time_utc_seconds' | ||
| ) | ||
| assert not is_excluded_science_variable( | ||
| sample_varinfo_test05, '/string_time_seconds' | ||
| ) | ||
| assert not is_excluded_science_variable( | ||
| sample_varinfo_test05, '/sub_group/string_time_seconds' | ||
| ) | ||
| assert not is_excluded_science_variable( | ||
| sample_varinfo_test05, '/sub_group/nested/string_time_seconds' | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| 'group_name', | ||
| [ | ||
| '/', | ||
| '/sub_group', | ||
| '/sub_group/nested_group', | ||
| ], | ||
| ) | ||
| def test_delete_variable(sample_netcdf4_file_test05, group_name): | ||
| """Ensure correct variables are deleted from the datatree.""" | ||
| with xr.open_datatree( | ||
| sample_netcdf4_file_test05, decode_times=False | ||
| ) as test_datatree: | ||
| variable_name = 'string_time_utc_seconds' | ||
| if group_name == '/': | ||
| full_path = f'/{variable_name}' | ||
| else: | ||
| full_path = f'{group_name}/{variable_name}' | ||
|
|
||
| # check that variable is in datatree before deletion | ||
| assert variable_name in test_datatree[group_name].ds.data_vars | ||
| initial_var_count = len(test_datatree[group_name].ds.data_vars) | ||
|
|
||
| delete_variable(test_datatree, full_path) | ||
| assert variable_name not in test_datatree[group_name].ds.data_vars | ||
| assert len(test_datatree[group_name].ds.data_vars) == initial_var_count - 1 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So this is probably the time to move this to earthdata-varinfo. Because I'll bet Josie has a similar function in her PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I certainly do.