From 1c7bfcfb7a618235164a2bb639f92b786067f573 Mon Sep 17 00:00:00 2001 From: bnb32 Date: Wed, 8 Jan 2025 11:07:06 -0700 Subject: [PATCH 1/5] interpolation adjustment: option to include single levels in interpolation along with isobars. Doing so can sometimes lead to issues if values computed from pressure levels differ from the single level values. This can result in very large alpha values in the linear interpolation routine. --- sup3r/preprocessing/derivers/base.py | 24 +++++++++++++++--------- sup3r/utilities/interpolation.py | 10 +++++----- tests/derivers/test_height_interp.py | 10 +++++++--- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/sup3r/preprocessing/derivers/base.py b/sup3r/preprocessing/derivers/base.py index 9a76705285..7e25ffda1d 100644 --- a/sup3r/preprocessing/derivers/base.py +++ b/sup3r/preprocessing/derivers/base.py @@ -55,17 +55,21 @@ def __init__( that is not found in the :class:`Rasterizer` data it will look for a method to derive the feature in the registry. interp_kwargs : dict | None - Dictionary of kwargs for level interpolation. Can include "method" - and "run_level_check" keys. Method specifies how to perform height - interpolation. e.g. Deriving u_20m from u_10m and u_100m. Options - are "linear" and "log". See + Dictionary of kwargs for level interpolation. Can include "method", + "run_level_check", and "include_single_levels" keys. Method + specifies how to perform height interpolation. e.g. Deriving + u_20m from u_10m and u_100m. Options are "linear" and "log". + ``include_single_levels = True`` will include single level + variables in addition to pressure level variables in the + interpolation. e.g. the 3D array of ``temperature_2m`` along + with the 4D array of ``temperature``. ``See :py:meth:`sup3r.preprocessing.derivers.Deriver.do_level_interpolation` """ # pylint: disable=line-too-long if FeatureRegistry is not None: self.FEATURE_REGISTRY = FeatureRegistry super().__init__(data=data) - self.interp_kwargs = interp_kwargs + self.interp_kwargs = interp_kwargs or {} features = parse_to_list(data=data, features=features) new_features = [f for f in features if f not in self.data] for f in new_features: @@ -269,7 +273,6 @@ def get_single_level_data(self, feature): var_array = da.stack(var_list, axis=-1) sl_shape = (*var_array.shape[:-1], len(lev_list)) lev_array = da.broadcast_to(da.from_array(lev_list), sl_shape) - return var_array, lev_array def get_multi_level_data(self, feature): @@ -296,8 +299,8 @@ def get_multi_level_data(self, feature): assert can_calc_height or have_height, msg if can_calc_height: - lev_array = self.data[['zg', 'topography']].as_array() - lev_array = lev_array[..., 0] - lev_array[..., 1] + lev_array = self.data['zg'] - self.data['topography'] + lev_array = lev_array.data else: lev_array = da.broadcast_to( self.data[Dimension.HEIGHT].astype(np.float32), @@ -321,7 +324,10 @@ def do_level_interpolation( ) -> xr.DataArray: """Interpolate over height or pressure to derive the given feature.""" ml_var, ml_levs = self.get_multi_level_data(feature) - sl_var, sl_levs = self.get_single_level_data(feature) + if interp_kwargs.get('include_single_levels', False): + sl_var, sl_levs = self.get_single_level_data(feature) + else: + sl_var, sl_levs = None, None fstruct = parse_feature(feature) attrs = {} diff --git a/sup3r/utilities/interpolation.py b/sup3r/utilities/interpolation.py index dd0d8c1bbd..0c0903acad 100644 --- a/sup3r/utilities/interpolation.py +++ b/sup3r/utilities/interpolation.py @@ -45,14 +45,14 @@ def get_level_masks(cls, lev_array, level): to the one requested. (lat, lon, time, level) """ - argmin1 = da.argmin(da.abs(lev_array - level), axis=-1, keepdims=True) + lev_diff = np.abs(lev_array - level) + argmin1 = da.argmin(lev_diff, axis=-1, keepdims=True) lev_indices = da.broadcast_to( da.arange(lev_array.shape[-1]), lev_array.shape ) mask1 = lev_indices == argmin1 - - other_levs = da.ma.masked_array(lev_array, mask1) - argmin2 = da.argmin(da.abs(other_levs - level), axis=-1, keepdims=True) + lev_diff = da.where(mask1, np.inf, lev_diff) + argmin2 = da.argmin(lev_diff, axis=-1, keepdims=True) mask2 = lev_indices == argmin2 return mask1, mask2 @@ -61,7 +61,7 @@ def _lin_interp(cls, lev_samps, var_samps, level): """Linearly interpolate between levels.""" diff = da.map_blocks(lambda x, y: x - y, lev_samps[1], lev_samps[0]) alpha = da.where( - diff == 0, + diff < 1e-3, 0, da.map_blocks(lambda x, y: x / y, (level - lev_samps[0]), diff), ) diff --git a/tests/derivers/test_height_interp.py b/tests/derivers/test_height_interp.py index 9464aa49eb..2019fdf859 100644 --- a/tests/derivers/test_height_interp.py +++ b/tests/derivers/test_height_interp.py @@ -156,7 +156,7 @@ def test_single_levels_height_interp_nc(shape=(10, 10), target=(37.25, -107)): transform = Deriver( no_transform.data, derive_features, - interp_kwargs={'method': 'linear'}, + interp_kwargs={'method': 'linear', 'include_single_levels': True}, ) h10 = np.zeros(transform.shape[:3], dtype=np.float32)[..., None] @@ -197,7 +197,11 @@ def test_plevel_height_interp_with_single_lev_data_nc( [wind_file, level_file], target=target, shape=shape ) - transform = Deriver(no_transform.data, derive_features) + transform = Deriver( + no_transform.data, + derive_features, + interp_kwargs={'include_single_levels': True}, + ) hgt_array = ( no_transform['zg'].data - no_transform['topography'].data[..., None] @@ -237,7 +241,7 @@ def test_log_interp(shape=(10, 10), target=(37.25, -107)): transform = Deriver( no_transform.data, derive_features, - interp_kwargs={'method': 'log'}, + interp_kwargs={'method': 'log', 'include_single_levels': True}, ) hgt_array = ( From 0e246a2888da8e553e47bcb93966b0a091a11e2f Mon Sep 17 00:00:00 2001 From: bnb32 Date: Wed, 8 Jan 2025 12:50:16 -0700 Subject: [PATCH 2/5] test fixes --- sup3r/utilities/interpolation.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/sup3r/utilities/interpolation.py b/sup3r/utilities/interpolation.py index 0c0903acad..776361db81 100644 --- a/sup3r/utilities/interpolation.py +++ b/sup3r/utilities/interpolation.py @@ -20,9 +20,6 @@ def get_level_masks(cls, lev_array, level): Parameters ---------- - var_array : Union[np.ndarray, da.core.Array] - Array of variable data, for example u-wind in a 4D array of shape - (lat, lon, time, level) lev_array : Union[np.ndarray, da.core.Array] Height or pressure values for the corresponding entries in var_array, in the same shape as var_array. If this is height and @@ -61,7 +58,7 @@ def _lin_interp(cls, lev_samps, var_samps, level): """Linearly interpolate between levels.""" diff = da.map_blocks(lambda x, y: x - y, lev_samps[1], lev_samps[0]) alpha = da.where( - diff < 1e-3, + diff == 0, 0, da.map_blocks(lambda x, y: x / y, (level - lev_samps[0]), diff), ) @@ -109,9 +106,6 @@ def interp_to_level( Parameters ---------- - var_array : xr.DataArray - Array of variable data, for example u-wind in a 4D array of shape - (lat, lon, time, level) lev_array : xr.DataArray Height or pressure values for the corresponding entries in var_array, in the same shape as var_array. If this is height and @@ -119,6 +113,9 @@ def interp_to_level( should be the geopotential height corresponding to every var_array index relative to the surface elevation (subtract the elevation at the surface from the geopotential height) + var_array : xr.DataArray + Array of variable data, for example u-wind in a 4D array of shape + (lat, lon, time, level) level : float level or levels to interpolate to (e.g. final desired hub height above surface elevation) From fd9055ca32ff710a64dd7fe80785e70a86228907 Mon Sep 17 00:00:00 2001 From: bnb32 Date: Wed, 8 Jan 2025 14:01:25 -0700 Subject: [PATCH 3/5] test fixes --- sup3r/utilities/interpolation.py | 2 +- tests/data_handlers/test_dh_nc_cc.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sup3r/utilities/interpolation.py b/sup3r/utilities/interpolation.py index 776361db81..79f0935182 100644 --- a/sup3r/utilities/interpolation.py +++ b/sup3r/utilities/interpolation.py @@ -48,7 +48,7 @@ def get_level_masks(cls, lev_array, level): da.arange(lev_array.shape[-1]), lev_array.shape ) mask1 = lev_indices == argmin1 - lev_diff = da.where(mask1, np.inf, lev_diff) + lev_diff = da.abs(da.ma.masked_array(lev_array, mask1) - level) argmin2 = da.argmin(lev_diff, axis=-1, keepdims=True) mask2 = lev_indices == argmin2 return mask1, mask2 diff --git a/tests/data_handlers/test_dh_nc_cc.py b/tests/data_handlers/test_dh_nc_cc.py index 9dd8680234..b952ab83c4 100644 --- a/tests/data_handlers/test_dh_nc_cc.py +++ b/tests/data_handlers/test_dh_nc_cc.py @@ -69,6 +69,7 @@ def test_reload_cache(): target=target, shape=(20, 20), cache_kwargs=cache_kwargs, + interp_kwargs={'include_single_levels': True} ) # reload from cache @@ -80,7 +81,9 @@ def test_reload_cache(): cache_kwargs=cache_kwargs, ) assert all(f in cached for f in features) - assert np.array_equal(handler.as_array(), cached.as_array()) + harr = handler.as_array().compute() + carr = cached.as_array().compute() + assert np.array_equal(harr, carr) @pytest.mark.parametrize( From efcacfa59cc4449c21334a287904341f53451b14 Mon Sep 17 00:00:00 2001 From: bnb32 Date: Wed, 8 Jan 2025 21:24:39 -0700 Subject: [PATCH 4/5] test fix --- tests/forward_pass/test_forward_pass.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/forward_pass/test_forward_pass.py b/tests/forward_pass/test_forward_pass.py index 69d6cfda7d..76d2687626 100644 --- a/tests/forward_pass/test_forward_pass.py +++ b/tests/forward_pass/test_forward_pass.py @@ -491,9 +491,7 @@ def test_fwp_chunking(input_files): data_chunked[hr_slice][..., t_hr_slice, :] = out err = data_chunked - data_nochunk - err /= data_nochunk - - assert np.mean(np.abs(err.flatten())) < 0.01 + assert np.mean(np.abs(err)) < 1e-6 def test_fwp_nochunking(input_files): From c03ad1485464b563d869429eb1124367fe40113e Mon Sep 17 00:00:00 2001 From: bnb32 Date: Sat, 11 Jan 2025 10:19:12 -0700 Subject: [PATCH 5/5] Aded ``load_features`` argument to ``DataHandler`` so this can be used to control which features are used in derivations, instead of using ``interp_kwargs``. Removed ``include_single_levels`` parsing from ``interp_kwargs`` --- sup3r/preprocessing/data_handlers/factory.py | 17 +++++++-- sup3r/preprocessing/derivers/base.py | 17 +++------ tests/data_handlers/test_dh_nc_cc.py | 3 +- tests/derivers/test_height_interp.py | 40 ++++++++++++++++++-- 4 files changed, 56 insertions(+), 21 deletions(-) diff --git a/sup3r/preprocessing/data_handlers/factory.py b/sup3r/preprocessing/data_handlers/factory.py index 1115ec792a..1eec3c46ce 100644 --- a/sup3r/preprocessing/data_handlers/factory.py +++ b/sup3r/preprocessing/data_handlers/factory.py @@ -47,6 +47,7 @@ def __init__( self, file_paths, features='all', + load_features='all', res_kwargs: Optional[dict] = None, chunks: Union[str, Dict[str, int]] = 'auto', target: Optional[tuple] = None, @@ -69,9 +70,16 @@ def __init__( file_paths : str | list | pathlib.Path file_paths input to LoaderClass features : list | str - Features to load and / or derive. If 'all' then all available raw - features will be loaded. Specify explicit feature names for - derivations. + Features to derive. If 'all' then all available raw features will + just be loaded. Specify explicit feature names for derivations. + load_features : list | str + Features to load and make available for derivations. If 'all' then + all available raw features will be loaded and made available for + derivations. This can be used to restrict features used for + derivations. For example, to derive 'temperature_100m' from only + temperature isobars, from data that includes single level values as + well (like temperature_2m), don't include 'temperature_2m' in the + ``load_features`` list. res_kwargs : dict Additional keyword arguments passed through to the ``BaseLoader``. BaseLoader is usually xr.open_mfdataset for NETCDF files and @@ -146,12 +154,13 @@ def __init__( ) just_coords = not features - raster_feats = 'all' if any(missing_features) else [] + raster_feats = load_features if any(missing_features) else [] self.rasterizer = self.loader = self.cache = None if any(cached_features): self.cache = Loader( file_paths=cached_files, + features=load_features, res_kwargs=res_kwargs, chunks=chunks, BaseLoader=BaseLoader, diff --git a/sup3r/preprocessing/derivers/base.py b/sup3r/preprocessing/derivers/base.py index 7e25ffda1d..d431d02c51 100644 --- a/sup3r/preprocessing/derivers/base.py +++ b/sup3r/preprocessing/derivers/base.py @@ -55,14 +55,10 @@ def __init__( that is not found in the :class:`Rasterizer` data it will look for a method to derive the feature in the registry. interp_kwargs : dict | None - Dictionary of kwargs for level interpolation. Can include "method", - "run_level_check", and "include_single_levels" keys. Method - specifies how to perform height interpolation. e.g. Deriving - u_20m from u_10m and u_100m. Options are "linear" and "log". - ``include_single_levels = True`` will include single level - variables in addition to pressure level variables in the - interpolation. e.g. the 3D array of ``temperature_2m`` along - with the 4D array of ``temperature``. ``See + Dictionary of kwargs for level interpolation. Can include "method" + and "run_level_check". "method" specifies how to perform height + interpolation. e.g. Deriving u_20m from u_10m and u_100m. Options + are "linear" and "log". See :py:meth:`sup3r.preprocessing.derivers.Deriver.do_level_interpolation` """ # pylint: disable=line-too-long if FeatureRegistry is not None: @@ -324,10 +320,7 @@ def do_level_interpolation( ) -> xr.DataArray: """Interpolate over height or pressure to derive the given feature.""" ml_var, ml_levs = self.get_multi_level_data(feature) - if interp_kwargs.get('include_single_levels', False): - sl_var, sl_levs = self.get_single_level_data(feature) - else: - sl_var, sl_levs = None, None + sl_var, sl_levs = self.get_single_level_data(feature) fstruct = parse_feature(feature) attrs = {} diff --git a/tests/data_handlers/test_dh_nc_cc.py b/tests/data_handlers/test_dh_nc_cc.py index b952ab83c4..740b6f7605 100644 --- a/tests/data_handlers/test_dh_nc_cc.py +++ b/tests/data_handlers/test_dh_nc_cc.py @@ -68,8 +68,7 @@ def test_reload_cache(): features=features, target=target, shape=(20, 20), - cache_kwargs=cache_kwargs, - interp_kwargs={'include_single_levels': True} + cache_kwargs=cache_kwargs ) # reload from cache diff --git a/tests/derivers/test_height_interp.py b/tests/derivers/test_height_interp.py index 2019fdf859..a747f7b254 100644 --- a/tests/derivers/test_height_interp.py +++ b/tests/derivers/test_height_interp.py @@ -114,6 +114,41 @@ def test_plevel_height_interp_nc_with_cache(): ) +def test_plevel_height_interp_with_filtered_load_features(): + """Test that filtering load features can be used to control the features + used in the derivations.""" + + with TemporaryDirectory() as td: + orog_file = os.path.join(td, 'orog.nc') + make_fake_nc_file(orog_file, shape=(10, 10, 20), features=['orog']) + sfc_file = os.path.join(td, 'u_10m.nc') + make_fake_nc_file(sfc_file, shape=(10, 10, 20), features=['u_10m']) + level_file = os.path.join(td, 'wind_levs.nc') + make_fake_nc_file( + level_file, shape=(10, 10, 20, 3), features=['zg', 'u'] + ) + derive_features = ['u_20m'] + dh_filt = DataHandler( + [orog_file, sfc_file, level_file], + features=derive_features, + load_features=['topography', 'zg', 'u'], + ) + dh_no_filt = DataHandler( + [orog_file, sfc_file, level_file], + features=derive_features, + ) + dh = DataHandler( + [orog_file, level_file], + features=derive_features, + ) + assert np.array_equal( + dh_filt.data['u_20m'].data, dh.data['u_20m'].data + ) + assert not np.array_equal( + dh_filt.data['u_20m'].data, dh_no_filt.data['u_20m'].data + ) + + def test_only_interp_method(): """Test that interp method alone returns the right values""" hgt = np.zeros((10, 10, 5, 3)) @@ -156,7 +191,7 @@ def test_single_levels_height_interp_nc(shape=(10, 10), target=(37.25, -107)): transform = Deriver( no_transform.data, derive_features, - interp_kwargs={'method': 'linear', 'include_single_levels': True}, + interp_kwargs={'method': 'linear'}, ) h10 = np.zeros(transform.shape[:3], dtype=np.float32)[..., None] @@ -200,7 +235,6 @@ def test_plevel_height_interp_with_single_lev_data_nc( transform = Deriver( no_transform.data, derive_features, - interp_kwargs={'include_single_levels': True}, ) hgt_array = ( @@ -241,7 +275,7 @@ def test_log_interp(shape=(10, 10), target=(37.25, -107)): transform = Deriver( no_transform.data, derive_features, - interp_kwargs={'method': 'log', 'include_single_levels': True}, + interp_kwargs={'method': 'log'}, ) hgt_array = (