IFRCGo
diff --git a/‎Dockerfile.save
Lines changed: 49 additions & 0 deletions b/‎Dockerfile.save
Lines changed: 49 additions & 0 deletions
diff --git a/‎apps/etl/tests/sources/test_desinventar.py
Lines changed: 71 additions & 0 deletions b/‎apps/etl/tests/sources/test_desinventar.py
Lines changed: 71 additions & 0 deletions
diff --git a/‎apps/etl/tests/sources/test_emdat.py
Lines changed: 65 additions & 0 deletions b/‎apps/etl/tests/sources/test_emdat.py
Lines changed: 65 additions & 0 deletions
diff --git a/‎apps/etl/tests/sources/test_gidd.py
Lines changed: 62 additions & 0 deletions b/‎apps/etl/tests/sources/test_gidd.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎apps/etl/tests/sources/test_glide.py
Lines changed: 63 additions & 0 deletions b/‎apps/etl/tests/sources/test_glide.py
Lines changed: 63 additions & 0 deletions
diff --git a/‎apps/etl/tests/sources/test_ibtracs.py
Lines changed: 64 additions & 0 deletions b/‎apps/etl/tests/sources/test_ibtracs.py
Lines changed: 64 additions & 0 deletions
@@ -0,0 +1,49 @@
+0;9u<<<<<<< HEAD
+FROM python:3.12-slim-bookworm AS base
+COPY --from=ghcr.io/astral-sh/uv:0.5.29 /uv /uvx /bin/
+||||||| parent of 074f51d (Upgrade to bookworm)
+FROM python:3.12-slim-bullseye AS base
+COPY --from=ghcr.io/astral-sh/uv:0.5.29 /uv /uvx /bin/
+=======
+FROM python:3.13-slim-bookworm AS base
+COPY --from=ghcr.io/astral-sh/uv:0.6.8 /uv /uvx /bin/
+>>>>>>> 074f51d (Upgrade to bookworm)
+
+LABEL maintainer="Montandon Dev"
+LABEL org.opencontainers.image.source="https://github.com/IFRCGo/montandon-etl/"
+
+ENV PYTHONUNBUFFERED=1
+
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+ENV UV_PROJECT_ENVIRONMENT="/usr/local/"
+
+WORKDIR /code
+
+COPY libs /code/libs
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    apt-get update -y \
+    && apt-get install -y --no-install-recommends \
+        # Build required packages
+        build-essential gcc libc-dev gdal-bin libgdal-dev libproj-dev \
+        # Helper packages
+        procps \
+        wait-for-it \
+<<<<<<< HEAD
+    && uv sync --frozen --no-install-project --all-groups \
+||||||| parent of 074f51d (Upgrade to bookworm)
+    && uv sync --frozen --no-install-project  --no-dev \
+=======
+    && uv lock --locked --offline \
+        # FIXME: Add condition to skip dev dependencies
+        && uv sync --frozen --no-install-project --all-groups \
+>>>>>>> 074f51d (Upgrade to bookworm)
+    # Clean-up
+    && apt-get remove -y gcc libc-dev libproj-dev build-essential libgdal-dev  \
+    && apt-get autoremove -y \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY . /code/
@@ -0,0 +1,71 @@
+# apps/etl/tests/sources/test_desinventar.py
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+from django.conf import settings
+from django.test import override_settings
+from django.core.serializers import serialize
+
+# Import the models used in assertions
+from apps.etl.models import ExtractionData, Transform, PyStacLoadData
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
+@pytest.mark.django_db
+def test_handle_extraction_with_mocked_request():
+    """
+    Test the GIDD extraction process by mocking the request sent to the extractor.
+    Ensures that Celery tasks run synchronously.
+    """
+    settings.CELERY_TASK_ALWAYS_EAGER = True
+
+    # Path to XML file
+    json_file_path = Path('/code/apps/etl/Dataset/Desinventar/DI_export_npl.xml')
+
+    # Read mock data from XML file
+    with open(json_file_path, 'r', encoding='utf-8') as f:
+        xml_data = f.read()
+
+    # Parse XML
+    import xml.etree.ElementTree as ET
+    root = ET.fromstring(xml_data)
+
+    # Convert XML data to a structure that can be used (optional - not used later)
+    mock_data = []
+    for item in root.findall('.//data_item'):  # Adjust XPath as needed
+        data_dict = {
+            'field1': item.find('field1').text if item.find('field1') is not None else None,
+            'field2': item.find('field2').text if item.find('field2') is not None else None,
+        }
+        mock_data.append(data_dict)
+
+    # Patch 'requests.get'
+    with patch('requests.get') as mock_get:
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.content = xml_data.encode("utf-8")
+        mock_response.headers = {"Content-Type": "application/xml"}
+        mock_get.return_value = mock_response
+
+        # Import inside the test function to avoid circular import
+        from apps.etl.etl_tasks.desinventar import ext_and_transform_desinventar_data
+
+        # Call the ETL function
+        ext_and_transform_desinventar_data()
+
+    # Assertions
+    assert ExtractionData.objects.count() == 1
+    assert Transform.objects.count() == 1
+    assert PyStacLoadData.objects.count() == 3592
+
+    # Fetch latest data
+    latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
+    latest_data_json = serialize('json', latest_data)
+
+    # Save JSON string directly to file
+    output_path = Path('/code/output/output_desinventar.json')
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w', encoding='utf-8') as json_file:
+        json_file.write(latest_data_json)
+
+    # Final assertion
+    assert output_path.exists(), f"Expected output JSON file {output_path} was not created."
@@ -0,0 +1,65 @@
+import json
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+from django.conf import settings
+from django.test import override_settings
+from django.core.serializers import serialize
+
+from apps.etl.etl_tasks.emdat import ext_and_transform_emdat_latest_data
+from apps.etl.models import ExtractionData, Transform, PyStacLoadData
+from pystac_monty.sources.common import MontyDataTransformer
+
+MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
+@pytest.mark.django_db
+def test_handle_extraction_with_mocked_request():
+    """
+    Test the GIDD extraction process by mocking the request sent to the extractor.
+    Ensures that Celery tasks run synchronously.
+    """
+    settings.CELERY_TASK_ALWAYS_EAGER = True  # Ensure Celery tasks run synchronously in tests
+
+    json_file_path = Path('/code/apps/etl/Dataset/EM-DAT/EM-DAT.json')
+
+    # Read mock data from file
+    with open(json_file_path, 'r') as f:
+        mock_data = json.load(f)
+        print("Mock Data:", mock_data)  # Check if data is correct
+
+    # Patch 'requests.get' used inside 'ext_and_transform_emdat_latest_data'
+    with patch('requests.get') as mock_get:
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = mock_data  # Mock .json() response
+
+        # Ensure that content is also correctly mocked (return valid JSON as bytes)
+        mock_response.content = json.dumps(mock_data).encode('utf-8')  # Mock .content
+        mock_response.headers = {"Content-Type": "application/json"}
+
+        # Mock requests.get() to return this response
+        mock_get.return_value = mock_response
+
+        # Call the function (without parameters) - it will use the patched requests.get
+        ext_and_transform_emdat_latest_data()
+
+    # Assertions: Check if data was correctly extracted and stored
+    assert ExtractionData.objects.count() == 25
+    assert Transform.objects.count() == 25
+    assert PyStacLoadData.objects.count() == 400  # Ensure expected number of records
+
+    # Fetch last processed data (latest 10 records)
+    latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
+    latest_data_json = serialize('json', latest_data)  # Convert queryset to JSON format
+    latest_data_dict = json.loads(latest_data_json)  # Convert JSON string to dictionary
+
+    # Save the latest processed data to a JSON file
+    output_path = Path('/code/output/output_emdat.json')
+    output_path.parent.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
+
+    with open(output_path, 'w', encoding='utf-8') as json_file:
+        json.dump(latest_data_dict, json_file, ensure_ascii=False, indent=4)
+
+    # Assert JSON file was created
+    assert output_path.exists(), f"Expected output JSON file {output_path} was not created."
@@ -0,0 +1,62 @@
+import json
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+from django.conf import settings
+from django.test import override_settings
+from django.core.serializers import serialize
+
+from apps.etl.etl_tasks.gidd import ext_and_transform_gidd_latest_data
+from apps.etl.models import ExtractionData, Transform, PyStacLoadData
+
+from pystac_monty.sources.common import MontyDataTransformer
+
+MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
+@pytest.mark.django_db
+def test_handle_extraction_with_mocked_request():
+    """
+    Test the GIDD extraction process by mocking the request sent to the extractor.
+    Ensures that Celery tasks run synchronously.
+    """
+    settings.CELERY_TASK_ALWAYS_EAGER = True
+
+    json_file_path = Path('/code/apps/etl/Dataset/IDMC-GIDD/IDMC_GIDD_Internal_Displacement_Disaggregated.geojson')
+
+    # Read mock data from file
+    with open(json_file_path, 'r', encoding='utf-8') as f:
+        mock_data = json.load(f)
+
+    # Patch 'requests.get' used inside 'ext_and_transform_gidd_latest_data'
+    with patch('requests.get') as mock_get:
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = mock_data
+        mock_response.content = json.dumps(mock_data).encode("utf-8")
+        mock_response.headers = {"Content-Type": "application/geojson"}
+        mock_get.return_value = mock_response
+
+        # Call the ETL function (uses patched requests.get)
+        ext_and_transform_gidd_latest_data()
+
+    # Assertions
+    assert ExtractionData.objects.count() == 1
+    assert Transform.objects.count() == 1
+    assert PyStacLoadData.objects.count() == 0
+
+    # Fetch latest data
+    latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
+    latest_data_json = serialize('json', latest_data)
+
+    # Debug (optional): print preview of JSON if needed
+    # print("Serialized output:", latest_data_json[:300])
+
+    # Save JSON string directly to file
+    output_path = Path('/code/output/output_gidd.json')
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w', encoding='utf-8') as json_file:
+        json_file.write(latest_data_json)
+
+    # Final assertion
+    assert output_path.exists(), f"Expected output JSON file {output_path} was not created."
@@ -0,0 +1,63 @@
+import json
+import pytest
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+from django.conf import settings
+from django.test import override_settings
+from django.core.serializers import serialize
+
+from apps.etl.etl_tasks.glide import ext_and_transform_glide_latest_data
+from apps.etl.models import ExtractionData, Transform, PyStacLoadData
+
+from pystac_monty.sources.common import MontyDataTransformer
+
+MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
+@pytest.mark.django_db
+def test_handle_extraction_with_mocked_request():
+    """
+    Test the GIDD extraction process by mocking the request sent to the extractor.
+    Ensures that Celery tasks run synchronously.
+    """
+    settings.CELERY_TASK_ALWAYS_EAGER = True  # Ensure Celery tasks run synchronously in tests
+
+    json_file_path = Path('/code/apps/etl/Dataset/Glide/Glide.json')
+
+    # Read mock data from file
+    with open(json_file_path, 'r') as f:
+        mock_data = json.load(f)
+
+    # Patch 'requests.get' used inside 'ext_and_transform_gidd_latest_data'
+    with patch('requests.get') as mock_get:
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = mock_data  # Mock .json() response
+        mock_response.content = json.dumps(mock_data).encode("utf-8")  # Mock .content
+        mock_response.headers = {"Content-Type": "application/json"}
+
+        # Mock requests.get() to return this response
+        mock_get.return_value = mock_response
+
+        # Call the function (without parameters) - it will use the patched requests.get
+        ext_and_transform_glide_latest_data()
+
+    # Assertions: Check if data was correctly extracted and stored
+    assert ExtractionData.objects.count() == 25
+    assert Transform.objects.count() == 25
+    assert PyStacLoadData.objects.count() == 400  # Ensure expected number of records
+
+    # Fetch last processed data (latest 10 records)
+    latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
+    latest_data_json = serialize('json', latest_data)  # Convert queryset to JSON format
+    latest_data_dict = json.loads(latest_data_json)  # Convert JSON string to dictionary
+
+    # Save the latest processed data to a JSON file
+    output_path = Path('/code/output/output_glide.json')
+    output_path.parent.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
+
+    with open(output_path, 'w', encoding='utf-8') as json_file:
+        json.dump(latest_data_dict, json_file, ensure_ascii=False, indent=4)
+
+    # Assert JSON file was created
+    assert output_path.exists(), f"Expected output JSON file {output_path} was not created."
@@ -0,0 +1,64 @@
+#ERROR RUnning but the data not stored or idk what is happening
+import pytest
+from unittest.mock import patch, MagicMock
+from django.conf import settings
+from django.test import override_settings
+from django.core.serializers import serialize
+import csv
+import json
+from pathlib import Path
+
+from apps.etl.etl_tasks.noaa_IBTrACS import ext_and_transform_ibtracs_latest_data
+from apps.etl.models import ExtractionData, Transform, PyStacLoadData
+
+from pystac_monty.sources.common import MontyDataTransformer
+
+MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
+
+@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
+@pytest.mark.django_db
+def test_handle_extraction_with_mocked_request():
+    """
+    Test the IBTRACS extraction process by mocking the request sent to the extractor.
+    Ensures that Celery tasks run synchronously.
+    """
+    settings.CELERY_TASK_ALWAYS_EAGER = True
+
+    csv_file_path = Path('/code/apps/etl/Dataset/Ibtracs/ibrtacs.csv')
+
+    # Read CSV data into a list of dictionaries
+    with open(csv_file_path, 'r', encoding='utf-8') as f:
+        csv_reader = csv.DictReader(f)
+        csv_data = [row for row in csv_reader]
+
+    # Patch 'requests.get' used inside 'ext_and_transform_ibtracs_latest_data'
+    with patch('requests.get') as mock_get:
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+
+        # Convert CSV data into a JSON-like structure for the mock response
+        mock_response.json.return_value = csv_data
+        mock_response.content = json.dumps(csv_data).encode("utf-8")
+        mock_response.headers = {"Content-Type": "text/csv"}
+        mock_get.return_value = mock_response
+
+        # Call the ETL function (uses patched requests.get)
+        ext_and_transform_ibtracs_latest_data()
+
+    # Assertions
+    assert ExtractionData.objects.count() == 1
+    assert Transform.objects.count() == 1
+    assert PyStacLoadData.objects.count() == 0
+
+    # Fetch the latest data
+    latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
+    latest_data_json = serialize('json', latest_data)
+
+    # Save the JSON string to a file
+    output_path = Path('/code/output/output_ibtracs.json')
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, 'w', encoding='utf-8') as json_file:
+        json_file.write(latest_data_json)
+
+    # Final assertion
+    assert output_path.exists(), f"Expected output JSON file {output_path} was not created."