Skip to content

Release 2.66 #147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 32 additions & 24 deletions .github/workflows/github-pages-deployment.yml
Original file line number Diff line number Diff line change
@@ -1,29 +1,37 @@
name: GitHub Pages Deployment
name: Deployment to GitHub Pages
on:
push:
branches:
main*
push:
branches:
- main*
permissions:
id-token: write
pages: write
id-token: write
pages: write
jobs:
# ---------------------------------------------------------------- #
# | Building and deployment of Sphinx build | #
# ---------------------------------------------------------------- #
docs:
# Build the Sphinx documentation
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- name: Install Dependencies
run: |
pip install sphinx piccolo-theme myst_parser
- name: Sphinx Build
run: |
sphinx-build ./sphinx-docs/ _build
- name: Upload GitHub Pages artifact
uses: actions/upload-pages-artifact@v3
with:
path: _build
- name: Push artifact to pages
uses: actions/deploy-pages@v3.0.1
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- name: Install Dependencies
run: |
pip install sphinx piccolo-theme myst_parser
- name: Sphinx Build
run: |
sphinx-build ./sphinx-docs/ build_outputs_folder
- name: Upload GitHub Pages artifact
uses: actions/upload-pages-artifact@v3
id: deployment
with:
path: build_outputs_folder/
# Deploy the Sphinx documentation to GitHub Pages
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ SPDX-License-Identifier: MPL-2.0
[![Bugs](https://sonarcloud.io/api/project_badges/measure?project=alliander-opensource_weather-provider-api&metric=bugs)](https://sonarcloud.io/dashboard?id=alliander-opensource_Weather-Provider-API)
<!--[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=alliander-opensource_weather-provider-api&metric=coverage)](https://sonarcloud.io/dashboard?id=alliander-opensource_Weather-Provider-API)-->

> :warning: **Due to changes in the way that CDS delivers data, ERA5 data does not currently process sea level data
> for the time being. This will be fixed in a future release.**


# Weather Provider Library and API

This API is intended to help you fetch weather data from different data sources in an efficient and uniform way.
Expand Down
245 changes: 24 additions & 221 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "weather_provider_api"
version = "2.63.0"
version = "2.66.0"
description = "Weather Provider Libraries and API"
authors = ["Verbindingsteam", "Raoul Linnenbank <58594297+rflinnenbank@users.noreply.github.com>"]
license = "MPL-2.0"
Expand Down
8 changes: 4 additions & 4 deletions weather_provider_api/routers/weather/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ def get_weather(
source_id: str,
model_id: str,
fetch_async: bool,
coords: List[List[Tuple[float, float]]],
begin: Optional[datetime.datetime] = None,
end: Optional[datetime.datetime] = None,
factors: List[str] = None,
coords: list[list[tuple[float, float]]],
begin: datetime.datetime | None = None,
end: datetime.datetime | None = None,
factors: list[str] | None = None,
):
"""Function to use the requested weather model from the requested source to get specific weather factors for a
specific time and specific location(s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def cleanup(self):
self._delete_excess_files()

@abstractmethod
def update(self):
def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
raise NotImplementedError(NOT_IMPLEMENTED_ERROR)

def gather_period(self, begin: datetime, end: datetime, coordinates: List[GeoPosition]) -> xr.Dataset:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ class CDSRequest(BaseModel):
"21:00",
"22:00",
"23:00",
]
],
)
data_format: str = "netcdf"
download_format: str = "zip"
area: tuple[float, float, float, float] = Field((7.22, 50.75, 3.2, 53.7))
area: tuple[float, float, float, float] = (53.7, 3.2, 50.75, 7.22)

@property
def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
Expand All @@ -100,6 +100,7 @@ def request_parameters(self) -> dict[str, str | list[str] | tuple[float]]:
"area": self.area,
"data_format": self.data_format,
"download_format": self.download_format,
"grid_resolution": (0.25, 0.25),
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Era5UpdateSettings(BaseModel):
maximum_runtime_in_minutes: int = 2 * 60 # 2 hours


def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpdateResult:
def era5_repository_update(update_settings: Era5UpdateSettings, test_mode: bool) -> RepositoryUpdateResult:
"""A function to update a variant of ERA5 data into the repository."""
starting_moment_of_update = datetime.now(UTC)
cutoff_time = starting_moment_of_update + relativedelta(minutes=update_settings.maximum_runtime_in_minutes)
Expand All @@ -54,7 +54,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd
logger.info(f" - Maximum runtime: {update_settings.maximum_runtime_in_minutes} minutes ({cutoff_time})")

try:
_era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time)
_era5_update_month_by_month(update_settings, starting_moment_of_update, cutoff_time, test_mode)
except Exception as e:
logger.error(f"Failed to update ERA5 data. Reason: {e}")
return RepositoryUpdateResult.failure
Expand All @@ -67,7 +67,7 @@ def era5_repository_update(update_settings: Era5UpdateSettings) -> RepositoryUpd


def _era5_update_month_by_month(
update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime
update_settings: Era5UpdateSettings, starting_moment_of_update: datetime, cutoff_time: datetime, test_mode: bool
):
"""A function to update a variant of ERA5 data into the repository."""
amount_of_months_processed = amount_of_months_not_processable = 0
Expand All @@ -90,7 +90,7 @@ def _era5_update_month_by_month(
logger.warning("Maximum runtime reached. Stopping update.")
break

update_result = _era5_update_month(update_settings, update_month)
update_result = _era5_update_month(update_settings, update_month, test_mode)
if update_result == RepositoryUpdateResult.failure:
amount_of_months_not_processable += 1
amount_of_months_processed += 1
Expand All @@ -111,7 +111,9 @@ def _era5_update_month_by_month(
logger.info(f"Average time per month: {average_time_per_month_in_minutes} minutes")


def _era5_update_month(update_settings: Era5UpdateSettings, update_month: datetime) -> RepositoryUpdateResult:
def _era5_update_month(
update_settings: Era5UpdateSettings, update_month: datetime, test_mode: bool
) -> RepositoryUpdateResult:
"""A function to update a variant of ERA5 data into the repository."""
logger.debug(f" > Processing month: {update_month.year}-{update_month.month}")

Expand All @@ -123,6 +125,9 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
logger.debug(f" > File {month_file} requires update.")
month_file_name = month_file.with_suffix(Era5FileSuffixes.UNFORMATTED)

# Only the first day of each month in test mode, otherwise all days:
day = [str(i) for i in list(range(1, 32))] if not test_mode else ["1"]

try:
download_era5_data(
update_settings.era5_dataset_to_update_from,
Expand All @@ -131,9 +136,8 @@ def _era5_update_month(update_settings: Era5UpdateSettings, update_month: dateti
variables=update_settings.factors_to_process,
year=[str(update_month.year)],
month=[str(update_month.month)],
day=[str(i) for i in list(range(1, 32))],
day=day,
time=[f"{hour:02d}:00" for hour in range(24)],
area=(53.510403, 3.314971, 50.803721, 7.092053),
),
target_location=str(month_file_name),
)
Expand Down Expand Up @@ -194,7 +198,6 @@ def _verify_first_day_available_for_era5(update_moment: datetime, update_setting
month=[str(update_moment.month)],
day=[str(update_moment.day)],
time=[f"{hour:02d}:00" for hour in range(2)],
area=(53.510403, 3.314971, 50.803721, 7.092053), # The Netherlands area
),
target_location=tempfile.NamedTemporaryFile().name,
)
Expand Down Expand Up @@ -230,7 +233,6 @@ def _finalize_formatted_file(file_path: Path, current_moment: date, verification
logger.error(f" > Failed to remove temporary file {file_path.with_suffix(file_suffix)}: {e}")

# Rename the file to its proper name:
print("RENAMING FILE", current_moment, verification_date, permanent_month, incomplete_month)
if current_moment == verification_date.replace(day=1):
# Current month means an incomplete file
file_path.with_suffix(Era5FileSuffixes.FORMATTED).rename(file_path.with_suffix(Era5FileSuffixes.INCOMPLETE))
Expand Down Expand Up @@ -271,7 +273,8 @@ def file_requires_update(file_path: Path, current_month: date, verification_date
return True # An update should both clean the UNFORMATTED file and generate a proper one

if not file_path.with_suffix(".nc").exists() or file_path.with_suffix(Era5FileSuffixes.INCOMPLETE).exists():
logger.debug(" > No file exists, or it is still incomplete: UPDATE REQUIRED ")
logger.debug(" > No file exists, or it is still incomplete: UPDATE REQUIRED")
print("File path: ", file_path)
return True # No file matching the mask or incomplete files always mean the update is required!

files_in_folder = glob.glob(f"{file_path}*.nc")
Expand Down Expand Up @@ -343,15 +346,34 @@ def _recombine_multiple_files(unformatted_file: Path) -> None:
with zipfile.ZipFile(unformatted_file, "r") as zip_ref:
zip_ref.extractall(temp_dir)

# Load the data

data_stream_land_accum = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-accum.nc"))
data_stream_land_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-oper_stepType-instant.nc"))
data_stream_wave_instant = xr.open_dataset(Path(temp_dir).joinpath("data_stream-wave_stepType-instant.nc"))
concatenated_dataset = xr.Dataset()
files_to_load_in_order = [
"data_stream-oper_stepType-instant",
"data_stream-oper_stepType-accum",
# TODO: Add the following file back in when we can properly handle it
# "data_stream-wave_stepType-instant", # Something about this data doesn't mesh well anymore with the rest...
]

# TODO: Load, convert to dataframe, merge, convert back to xarray
concatenated_dataset = xr.Dataset()
for filename in files_to_load_in_order:
file_path = Path(temp_dir).joinpath(f"{filename}.nc")
if not file_path.exists():
logger.error(f" > Required file {filename}.nc does not exist. Aborting recombination.")
raise FileNotFoundError(f" > Required file {filename}.nc does not exist. Aborting recombination.")

dataset = xr.open_dataset(file_path)
dataset = dataset.drop("expver", errors="raise")

if not concatenated_dataset.data_vars:
concatenated_dataset = dataset.copy(deep=True)
else:
concatenated_dataset = xr.merge(
[concatenated_dataset, dataset], join="outer", compat="no_conflicts", combine_attrs="override"
)

# Merge the data
combined_data = xr.merge([data_stream_land_accum, data_stream_land_instant, data_stream_wave_instant])
combined_data.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
concatenated_dataset.to_netcdf(unformatted_file, format="NETCDF4", engine="netcdf4")
# raise ValueError("This is not working yet")


def download_era5_data(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pytz import UTC

from weather_provider_api.routers.weather.repository.repository import RepositoryUpdateResult, WeatherRepositoryBase
from weather_provider_api.routers.weather.sources.cds.client.cds_api_tools import CDSDataSets
from weather_provider_api.routers.weather.sources.cds.client.era5_utils import (
Era5UpdateSettings,
era5_repository_update,
Expand Down Expand Up @@ -52,7 +53,7 @@ def last_day_of_repo(self) -> datetime:
last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
return last_day_of_repo

def update(self) -> RepositoryUpdateResult:
def update(self, test_mode: bool = False) -> RepositoryUpdateResult:
"""The update implementation for the ERA5 Land repository.

This function handles all the required actions to update the repository completely, but taking into
Expand All @@ -69,14 +70,15 @@ def update(self) -> RepositoryUpdateResult:
return era5_repository_update(
Era5UpdateSettings(
filename_prefix=self.file_prefix,
era5_dataset_to_update_from="reanalysis-era5-land",
era5_dataset_to_update_from=CDSDataSets.ERA5LAND,
era5_product_type="reanalysis",
factor_dictionary=era5land_factors,
factors_to_process=[era5land_factors[x] for x in list(era5land_factors.keys())],
maximum_runtime_in_minutes=self.runtime_limit,
repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
target_storage_location=self.repository_folder,
)
),
test_mode=test_mode,
)

def _delete_files_outside_of_scope(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,17 @@ def last_day_of_repo(self) -> datetime:
last_day_of_repo = last_day_of_repo.replace(hour=0, minute=0, second=0, microsecond=0)
return last_day_of_repo

def update(self) -> RepositoryUpdateResult:
def update(self, test_mode: bool) -> RepositoryUpdateResult:
"""The update implementation for the ERA5 Single Levels repository.

This function handles all the required actions to update the repository completely, but taking into
account its set runtime_limit. If based on the time of completion of other downloaded files this session
the next file wouldn't complete within the runtime_limit, the update process halts.
(if no other downloads were made yet, a generous rough estimate is used).

Args:
test_mode: A boolean indicating whether the update process should run in test mode.

Returns:
A RepositoryUpdateResult value indicating a completion, time-out or failure of the update process
"""
Expand All @@ -77,7 +80,8 @@ def update(self) -> RepositoryUpdateResult:
maximum_runtime_in_minutes=self.runtime_limit,
repository_time_range=(self.first_day_of_repo, self.last_day_of_repo),
target_storage_location=self.repository_folder,
)
),
test_mode=test_mode,
)

def _delete_files_outside_of_scope(self):
Expand Down Expand Up @@ -130,8 +134,11 @@ def _get_file_list_for_period(self, start: datetime, end: datetime):
file_month = int(file[len_filename_until_date + 5 : len_filename_until_date + 7])
date_for_filename = datetime(year=file_year, month=file_month, day=15).astimezone(UTC)


if start.replace(day=1) < date_for_filename < datetime(year=end.year, month=end.month, day=28).astimezone(UTC):
if (
start.replace(day=1)
< date_for_filename
< datetime(year=end.year, month=end.month, day=28).astimezone(UTC)
):
# If the file is within the requested period, save it to the list of filtered files
list_of_filtered_files.append(file)

Expand Down
10 changes: 6 additions & 4 deletions weather_provider_api/routers/weather/utils/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def file_or_text_response(
model_id: str,
request: Union[WeatherContentRequestQuery, WeatherContentRequestMultiLocationQuery],
coords: List[Tuple[float, float]],
):
) -> tuple[ScientificJSONResponse | FileResponse, str | None]:
if response_format == ResponseFormat.json:
return json_response(unserialized_data, coords)
elif response_format == ResponseFormat.json_dataset:
Expand All @@ -41,8 +41,8 @@ def file_response(
source_id: str,
model_id: str,
request: WeatherContentRequestQuery,
coords: List[Tuple[float, float]],
):
coords: list[tuple[float, float]],
) -> tuple[FileResponse, str]:
if response_format == ResponseFormat.netcdf4:
file_path = to_netcdf4(unserialized_data)
mime = "application/x-netcdf4"
Expand All @@ -64,7 +64,9 @@ def file_response(


def generate_filename(source_id: str, model_id: str, request: WeatherContentRequestQuery, extension: str):
file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T")
file_name = f"weather_{source_id}_{model_id}_{request.begin}-{request.end}{extension}".replace(" ", "T").replace(
":", ""
)
return file_name


Expand Down
Loading
Loading