MeteoSwiss
diff --git a/‎pyproject.toml‎
Lines changed: 24 additions & 7 deletions b/‎pyproject.toml‎
Lines changed: 24 additions & 7 deletions
diff --git a/‎src/weathergen/__init__.py‎
Lines changed: 3 additions & 4 deletions b/‎src/weathergen/__init__.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/weathergen/datasets/anemoi_dataset.py‎
Lines changed: 0 additions & 2 deletions b/‎src/weathergen/datasets/anemoi_dataset.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎src/weathergen/datasets/batchifyer.py‎
Lines changed: 7 additions & 17 deletions b/‎src/weathergen/datasets/batchifyer.py‎
Lines changed: 7 additions & 17 deletions
diff --git a/‎src/weathergen/datasets/multi_stream_data_sampler.py‎
Lines changed: 22 additions & 27 deletions b/‎src/weathergen/datasets/multi_stream_data_sampler.py‎
Lines changed: 22 additions & 27 deletions
diff --git a/‎src/weathergen/datasets/normalizer.py‎
Lines changed: 2 additions & 2 deletions b/‎src/weathergen/datasets/normalizer.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/weathergen/datasets/obs_dataset.py‎
Lines changed: 6 additions & 4 deletions b/‎src/weathergen/datasets/obs_dataset.py‎
Lines changed: 6 additions & 4 deletions
@@ -54,18 +54,35 @@ line-length = 100
 # All disabled until the code is formatted.
 select = [
     # pycodestyle
-    # "E",
+    "E",
     # Pyflakes
-    # "F",
+    "F",
     # pyupgrade
-    # "UP",
+    "UP",
     # flake8-bugbear
-    # "B",
+    "B",
     # flake8-simplify
-    # "SIM",
+    "SIM",
     # isort
-    # "I",
+    "I",
 ]
-ignore = [
 
+# These rules are sensible and should be enabled at a later stage.
+ignore = [
+  "E501",
+  "E721",
+  "E722",
+  "B006",
+  "B011",
+  "UP008",
+  "SIM115",
+  "SIM117",
+  "SIM118",
+  "SIM102",
+  "SIM210",
+  "SIM212",
+  "SIM401",
+  "F811",
+  # To ignore, not relevant for us
+  "E741",
 ]
@@ -7,14 +7,13 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-import time
-import sys
 import pdb
+import sys
+import time
 import traceback
 
-from weathergen.utils.config import Config
 from weathergen.train.trainer import Trainer
-from weathergen.train.utils import get_run_id
+from weathergen.utils.config import Config
 
 
 ####################################################################################################
 
@@ -7,11 +7,9 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-import code
 import datetime
 
 import numpy as np
-
 from anemoi.datasets import open_dataset
 
 
 
@@ -7,29 +7,20 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-import torch
-import numpy as np
-import code
 import warnings
-import time
+from functools import partial
 
 import astropy_healpix as hp
-from astropy_healpix.healpy import ang2pix, pix2ang
-
-from functools import partial
+import numpy as np
+import torch
+from astropy_healpix.healpy import ang2pix
 
 from weathergen.datasets.utils import (
-    vecs_to_rots,
-    s2tor3,
-    r3tos2,
-    locs_to_cell_coords,
-    coords_to_hpyidxs,
-    healpix_verts,
-    get_target_coords_local,
-    get_target_coords_local_fast,
     get_target_coords_local_ffast,
     healpix_verts_rots,
     locs_to_cell_coords_ctrs,
+    r3tos2,
+    s2tor3,
 )
 
 
@@ -64,7 +55,6 @@ def tokenize_window_space(
     )
     hpy_idxs_ord_split = np.split(hpy_idxs_ord, splits + 1)
 
-    lens = []
     for i, c in enumerate(cells_idxs):
         thetas_sorted = torch.argsort(thetas[hpy_idxs_ord_split[i]], stable=True)
         posr3_cell = posr3[hpy_idxs_ord_split[i]][thetas_sorted]
@@ -110,7 +100,7 @@ def tokenize_window_spacetime(
     mr,
 ):
     t_unique = np.unique(times)
-    for i, t in enumerate(t_unique):
+    for _, t in enumerate(t_unique):
         mask = t == times
         tokens_cells = tokenize_window_space(
             source[mask],
 
@@ -7,25 +7,18 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
-import numpy as np
-import torch
-import math
 import datetime
-from copy import deepcopy
 import logging
-import time
-import code
-import os
-import yaml
 
+import numpy as np
 import pandas as pd
+import torch
 
-from weathergen.datasets.obs_dataset import ObsDataset
 from weathergen.datasets.anemoi_dataset import AnemoiDataset
-from weathergen.datasets.normalizer import DataNormalizer
 from weathergen.datasets.batchifyer import Batchifyer
+from weathergen.datasets.normalizer import DataNormalizer
+from weathergen.datasets.obs_dataset import ObsDataset
 from weathergen.datasets.utils import merge_cells
-
 from weathergen.utils.logger import logger
 
 
@@ -69,7 +62,7 @@ def __init__(
         self.len_hrs = len_hrs
         self.step_hrs = step_hrs
 
-        fc_policy_seq = "sequential" == forecast_policy or "sequential_random" == forecast_policy
+        fc_policy_seq = forecast_policy == "sequential" or forecast_policy == "sequential_random"
         assert forecast_steps >= 0 if not fc_policy_seq else True
         self.forecast_delta_hrs = forecast_delta_hrs if forecast_delta_hrs > 0 else self.len_hrs
         self.forecast_steps = np.array(
@@ -111,7 +104,7 @@ def __init__(
                     # the processing here is not natural but a workaround to various inconsistencies in the
                     # current datasets
                     data_idxs = [
-                        i for i, cn in enumerate(ds.selected_colnames[do:]) if "obsvalue_" == cn[:9]
+                        i for i, cn in enumerate(ds.selected_colnames[do:]) if cn[:9] == "obsvalue_"
                     ]
                     mask = np.ones(len(ds.selected_colnames[do:]), dtype=np.int32).astype(bool)
                     mask[data_idxs] = False
@@ -272,7 +265,7 @@ def __iter__(self):
         # idx_raw is used to index into the dataset; the decoupling is needed
         # since there are empty batches
         idx_raw = iter_start
-        for i, bidx in enumerate(range(iter_start, iter_end, self.batch_size)):
+        for i, _bidx in enumerate(range(iter_start, iter_end, self.batch_size)):
             # targets, targets_coords, targets_idxs = [], [], [],
             tcs, tcs_lens, target_tokens, source_tokens_cells, source_tokens_lens = (
                 [],
@@ -314,7 +307,7 @@ def __iter__(self):
                 c_source_raw = []
 
                 for obs_id, (stream_info, stream_dsn, stream_idxs) in enumerate(
-                    zip(self.streams, self.obs_datasets_norm, self.obs_datasets_idxs)
+                    zip(self.streams, self.obs_datasets_norm, self.obs_datasets_idxs, strict=False)
                 ):
                     s_tcs = []
                     s_tcs_lens = []
@@ -326,17 +319,17 @@ def __iter__(self):
                     s_source_raw = []
 
                     token_size = stream_info["token_size"]
-                    grid = (
-                        stream_info["gridded_output"] if "gridded_output" in stream_info else None
-                    )
-                    grid_info = (
-                        stream_info["gridded_output_info"]
-                        if "gridded_output_info" in stream_info
-                        else None
-                    )
+                    # grid = (
+                    #     stream_info["gridded_output"] if "gridded_output" in stream_info else None
+                    # )
+                    # grid_info = (
+                    #     stream_info["gridded_output_info"]
+                    #     if "gridded_output_info" in stream_info
+                    #     else None
+                    # )
 
                     for i_source, ((ds, normalizer, do), s_idxs) in enumerate(
-                        zip(stream_dsn, stream_idxs)
+                        zip(stream_dsn, stream_idxs, strict=False)
                     ):
                         # source window (of potentially multi-step length)
                         (source1, times1) = ds[idx]
@@ -417,7 +410,7 @@ def __iter__(self):
                     for fstep in range(forecast_dt + 1):
                         # collect all streams
                         for i_source, ((ds, normalizer, do), s_idxs) in enumerate(
-                            zip(stream_dsn, stream_idxs)
+                            zip(stream_dsn, stream_idxs, strict=False)
                         ):
                             (source2, times2) = ds[idx + step_forecast_dt]
 
@@ -534,15 +527,17 @@ def __iter__(self):
                     idxs = torch.cat(
                         [
                             torch.arange(o, o + l, dtype=torch.int64)
-                            for o, l in zip(offsets, source_tokens_lens[ib, itype])
+                            for o, l in zip(offsets, source_tokens_lens[ib, itype], strict=False)
                         ]
                     )
                     idxs_embed[-1] += [idxs.unsqueeze(1)]
                     idxs_embed_pe[-1] += [
                         torch.cat(
                             [
                                 torch.arange(o, o + l, dtype=torch.int32)
-                                for o, l in zip(offsets_pe, source_tokens_lens[ib][itype])
+                                for o, l in zip(
+                                    offsets_pe, source_tokens_lens[ib][itype], strict=False
+                                )
                             ]
                         )
                     ]
 
@@ -69,11 +69,11 @@ def normalize_coords(self, data, normalize_latlon=True):
 
         go = self.geoinfo_offset
         for i, ch in enumerate(self.geoinfo_idx):
-            if 0 == i:  # lats
+            if i == 0:  # lats
                 if normalize_latlon:
                     data[..., go + i] = np.sin(np.deg2rad(data[..., go + i]))
                 pass
-            elif 1 == i:  # lons
+            elif i == 1:  # lons
                 if normalize_latlon:
                     data[..., go + i] = np.sin(0.5 * np.deg2rad(data[..., go + i]))
             else:
 
@@ -7,11 +7,11 @@
 # granted to it by virtue of its status as an intergovernmental organisation
 # nor does it submit to any jurisdiction.
 
+import code
 import datetime
 
 import numpy as np
 import zarr
-import code
 
 
 class ObsDataset:
@@ -37,12 +37,14 @@ def __init__(
 
         # self.selected_colnames = self.colnames
         # self.selected_cols_idx = np.arange(len(self.colnames))
+        idx = 0
         for i, col in enumerate(reversed(self.colnames)):
+            idx = i
             # if col[:9] == 'obsvalue_' :
             if not (col[:4] == "sin_" or col[:4] == "cos_"):
                 break
-        self.selected_colnames = self.colnames[: len(self.colnames) - i]
-        self.selected_cols_idx = np.arange(len(self.colnames))[: len(self.colnames) - i]
+        self.selected_colnames = self.colnames[: len(self.colnames) - idx]
+        self.selected_cols_idx = np.arange(len(self.colnames))[: len(self.colnames) - idx]
 
         # Create index for samples
         self._setup_sample_index(start, end, self.len_hrs, self.step_hrs)
@@ -190,7 +192,7 @@ def _load_properties(self) -> None:
 
 ####################################################################################################
 if __name__ == "__main__":
-    zarrpath = config.zarrpath
+    # zarrpath = config.zarrpath
     zarrpath = "/lus/h2resw01/fws4/lb/project/ai-ml/observations/zarr/v0.2"
 
     # # polar orbiting satellites