From d8f1dee7c8b5a2cbd042e5a92c2d1fc7f3898c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Wed, 8 Jan 2025 16:38:20 +0100 Subject: [PATCH 1/5] Catch connection errors and incorporate them in retry strategy --- src/meteole/clients.py | 83 +++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 38 deletions(-) diff --git a/src/meteole/clients.py b/src/meteole/clients.py index 3610562..2d27b62 100644 --- a/src/meteole/clients.py +++ b/src/meteole/clients.py @@ -59,7 +59,7 @@ class MeteoFranceClient(BaseClient): TOKEN_URL: str = "https://portail-api.meteofrance.fr/token" GET_TOKEN_TIMEOUT_SEC: int = 10 INVALID_JWT_ERROR_CODE: str = "900901" - RETRY_DELAY_SEC: int = 5 + RETRY_DELAY_SEC: int = 10 def __init__( self, @@ -109,43 +109,50 @@ def get(self, path: str, *, params: dict[str, Any] | None = None, max_retries: i while attempt < max_retries: # HTTP GET request - resp: Response = self._session.get(url, params=params, verify=self._verify) - - if resp.status_code == HttpStatus.OK: - logger.debug("Successful request") - return resp - - elif self._is_token_expired(resp): - logger.info("Token expired, requesting a new one") - - # Refresh the cached token - self._token = self._get_token() - - # Reconnect with the new token - self._connect() - - elif resp.status_code == HttpStatus.FORBIDDEN: - logger.error("Access forbidden") - raise GenericMeteofranceApiError(resp.text) - - elif resp.status_code == HttpStatus.BAD_REQUEST: - logger.error("Parameter error") - raise GenericMeteofranceApiError(resp.text) - - elif resp.status_code == HttpStatus.NOT_FOUND: - logger.error("Missing data") - raise MissingDataError(resp.text) - - elif ( - resp.status_code == HttpStatus.BAD_GATEWAY - or resp.status_code == HttpStatus.UNAVAILABLE - or resp.status_code == HttpStatus.GATEWAY_TIMEOUT - ): - logger.error("Service not available") - time.sleep(self.RETRY_DELAY_SEC) - attempt += 1 - logger.info(f"Retrying... Attempt {attempt} of {max_retries}") - continue + try: + resp: Response = self._session.get(url, params=params, verify=self._verify) + + if resp.status_code == HttpStatus.OK: + logger.debug("Successful request") + return resp + + elif self._is_token_expired(resp): + logger.info("Token expired, requesting a new one") + + # Refresh the cached token + self._token = self._get_token() + + # Reconnect with the new token + self._connect() + + elif resp.status_code == HttpStatus.FORBIDDEN: + logger.error("Access forbidden") + raise GenericMeteofranceApiError(resp.text) + + elif resp.status_code == HttpStatus.BAD_REQUEST: + logger.error("Parameter error") + raise GenericMeteofranceApiError(resp.text) + + elif resp.status_code == HttpStatus.NOT_FOUND: + logger.error("Missing data") + raise MissingDataError(resp.text) + + elif ( + resp.status_code == HttpStatus.BAD_GATEWAY + or resp.status_code == HttpStatus.UNAVAILABLE + or resp.status_code == HttpStatus.GATEWAY_TIMEOUT + ): + logger.error("Service not available") + + except ConnectionError as e: + logger.warning(f"Connection error : {e}.") + + # Wait before retrying + attempt += 1 + waiting_time = attempt * self.RETRY_DELAY_SEC + logger.info(f"Retrying (attempt {attempt}/{max_retries}) - waiting {waiting_time}s before retrying...") + time.sleep(waiting_time) + continue raise GenericMeteofranceApiError(f"Failed to get a successful response from API after {attempt} retries") From 99356eb6bcde9036a7a4128c72a9086fa32ab575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Wed, 8 Jan 2025 17:01:06 +0100 Subject: [PATCH 2/5] Use ConnectionError from requests --- src/meteole/clients.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/meteole/clients.py b/src/meteole/clients.py index 2d27b62..698b323 100644 --- a/src/meteole/clients.py +++ b/src/meteole/clients.py @@ -144,7 +144,7 @@ def get(self, path: str, *, params: dict[str, Any] | None = None, max_retries: i ): logger.error("Service not available") - except ConnectionError as e: + except requests.exceptions.ConnectionError as e: logger.warning(f"Connection error : {e}.") # Wait before retrying From ecb0c6f651057c06435e4fed0c26abae751967b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Wed, 8 Jan 2025 17:53:38 +0100 Subject: [PATCH 3/5] Remove parent directory after reading grib file --- src/meteole/forecast.py | 46 ++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/meteole/forecast.py b/src/meteole/forecast.py index 6050a08..cfb9400 100644 --- a/src/meteole/forecast.py +++ b/src/meteole/forecast.py @@ -1,7 +1,6 @@ from __future__ import annotations import datetime as dt -import glob import logging import os import re @@ -377,11 +376,6 @@ def _transform_grib_to_df(self) -> pd.DataFrame: ds = xr.open_dataset(self.filepath, engine="cfgrib") df = ds.to_dataframe().reset_index() - os.remove(str(self.filepath)) - idx_files = glob.glob(f"{self.filepath}.*.idx") - for idx_file in idx_files: - os.remove(idx_file) - return df def _get_data_single_forecast( @@ -407,7 +401,7 @@ def _get_data_single_forecast( pd.DataFrame: The forecast for the specified time. """ - self._get_coverage_file( + filepath = self._get_coverage_file( coverage_id=coverage_id, height=height, pressure=pressure, @@ -418,6 +412,8 @@ def _get_data_single_forecast( df = self._transform_grib_to_df() + self._remove_coverage_files(filepath) + df.drop(columns=["surface", "valid_time"], errors="ignore", inplace=True) df.rename( columns={ @@ -453,6 +449,42 @@ def _get_data_single_forecast( return df + def _remove_coverage_files(self, filepath: Path) -> None: + """ + Removes a coverage file and its associated index files (.idx). + + If the parent directory becomes empty after file removal, it deletes the parent directory. + + Args: + filepath (Path): Path to the main coverage file to be removed. + + Raises: + FileNotFoundError: If the specified file does not exist. + PermissionError: If the file or directory cannot be removed due to insufficient permissions. + """ + # Ensure filepath is a Path object + filepath = Path(filepath) + + # remove file + os.remove(str(filepath)) + # Remove the main file + if filepath.exists(): + filepath.unlink() + + # remove potential idx files + idx_files = filepath.parent.glob(f"{filepath.name}.*.idx") + for idx_file in idx_files: + os.remove(idx_file) + + # Remove the parent directory if it's empty + parent_dir = filepath.parent + try: + if not any(parent_dir.iterdir()): # Check if the directory is empty + parent_dir.rmdir() + except OSError as e: + # Handle potential errors (e.g., directory in use or permissions issue) + raise PermissionError(f"Failed to remove directory '{parent_dir}': {e}") from e + def _get_coverage_file( self, coverage_id: str, From 6f314ae330225be206d1ee3aaaf6725453170112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Thu, 9 Jan 2025 13:29:07 +0100 Subject: [PATCH 4/5] Fix tests: add mock for new func --- tests/test_forecasts.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_forecasts.py b/tests/test_forecasts.py index 9233b5a..51c9751 100644 --- a/tests/test_forecasts.py +++ b/tests/test_forecasts.py @@ -187,7 +187,10 @@ def test_get_coverage_file(self, mock_get_request, mock_get_capabilities): @patch("meteole._arome.AromeForecast.get_capabilities") @patch("meteole._arome.AromeForecast._transform_grib_to_df") @patch("meteole._arome.AromeForecast._get_coverage_file") - def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities): + @patch("meteole._arome.AromeForecast._remove_coverage_files") + def test_get_data_single_forecast( + self, mock_remove_coverage_files, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities + ): mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3]}) forecast = AromeForecast( @@ -210,8 +213,9 @@ def test_get_data_single_forecast(self, mock_get_coverage_file, mock_transform_g @patch("meteole._arome.AromeForecast.get_capabilities") @patch("meteole._arome.AromeForecast._transform_grib_to_df") @patch("meteole._arome.AromeForecast._get_coverage_file") + @patch("meteole._arome.AromeForecast._remove_coverage_files") def test_get_data_single_forecast_with_height( - self, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities + self, mock_remove_coverage_files, mock_get_coverage_file, mock_transform_grib_to_df, mock_get_capabilities ): mock_transform_grib_to_df.return_value = pd.DataFrame({"data": [1, 2, 3], "heightAboveGround": ["2", "2", "2"]}) From 5d8b440fd8369207a9fef5df25468e888f6272b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gratien=20D=C3=A9sormeaux?= Date: Thu, 9 Jan 2025 13:34:49 +0100 Subject: [PATCH 5/5] Fix tests: remove useless folders created during tests --- tests/test_forecasts.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_forecasts.py b/tests/test_forecasts.py index 51c9751..5234440 100644 --- a/tests/test_forecasts.py +++ b/tests/test_forecasts.py @@ -1,3 +1,4 @@ +import os import unittest from pathlib import Path from unittest.mock import MagicMock, patch @@ -169,7 +170,7 @@ def test_get_coverage_file(self, mock_get_request, mock_get_capabilities): ) coverage_id = "coverage_1" - forecast._get_coverage_file( + path = forecast._get_coverage_file( coverage_id=coverage_id, height=2, forecast_horizon_in_seconds=0, @@ -177,12 +178,12 @@ def test_get_coverage_file(self, mock_get_request, mock_get_capabilities): long=(-12, 16), ) - import os - expected_path = Path(os.getcwd()) / coverage_id / "2m_0Z_37.5-55.4_-12-16.grib" self.assertTrue(expected_path.exists()) + self.assertTrue(expected_path == path) - expected_path.unlink() + # remove the folder created in _get_coverage_file + forecast._remove_coverage_files(path) @patch("meteole._arome.AromeForecast.get_capabilities") @patch("meteole._arome.AromeForecast._transform_grib_to_df")