Skip to content

Commit bf7f7b4

Browse files
committed
Test case for all sources
1 parent 190c4a7 commit bf7f7b4

File tree

373 files changed

+3153
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

373 files changed

+3153
-1
lines changed

Dockerfile.save

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
0;9u<<<<<<< HEAD
2+
FROM python:3.12-slim-bookworm AS base
3+
COPY --from=ghcr.io/astral-sh/uv:0.5.29 /uv /uvx /bin/
4+
||||||| parent of 074f51d (Upgrade to bookworm)
5+
FROM python:3.12-slim-bullseye AS base
6+
COPY --from=ghcr.io/astral-sh/uv:0.5.29 /uv /uvx /bin/
7+
=======
8+
FROM python:3.13-slim-bookworm AS base
9+
COPY --from=ghcr.io/astral-sh/uv:0.6.8 /uv /uvx /bin/
10+
>>>>>>> 074f51d (Upgrade to bookworm)
11+
12+
LABEL maintainer="Montandon Dev"
13+
LABEL org.opencontainers.image.source="https://github.com/IFRCGo/montandon-etl/"
14+
15+
ENV PYTHONUNBUFFERED=1
16+
17+
ENV UV_COMPILE_BYTECODE=1
18+
ENV UV_LINK_MODE=copy
19+
ENV UV_PROJECT_ENVIRONMENT="/usr/local/"
20+
21+
WORKDIR /code
22+
23+
COPY libs /code/libs
24+
25+
RUN --mount=type=cache,target=/root/.cache/uv \
26+
--mount=type=bind,source=uv.lock,target=uv.lock \
27+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
28+
apt-get update -y \
29+
&& apt-get install -y --no-install-recommends \
30+
# Build required packages
31+
build-essential gcc libc-dev gdal-bin libgdal-dev libproj-dev \
32+
# Helper packages
33+
procps \
34+
wait-for-it \
35+
<<<<<<< HEAD
36+
&& uv sync --frozen --no-install-project --all-groups \
37+
||||||| parent of 074f51d (Upgrade to bookworm)
38+
&& uv sync --frozen --no-install-project --no-dev \
39+
=======
40+
&& uv lock --locked --offline \
41+
# FIXME: Add condition to skip dev dependencies
42+
&& uv sync --frozen --no-install-project --all-groups \
43+
>>>>>>> 074f51d (Upgrade to bookworm)
44+
# Clean-up
45+
&& apt-get remove -y gcc libc-dev libproj-dev build-essential libgdal-dev \
46+
&& apt-get autoremove -y \
47+
&& rm -rf /var/lib/apt/lists/*
48+
49+
COPY . /code/
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# apps/etl/tests/sources/test_desinventar.py
2+
import pytest
3+
from pathlib import Path
4+
from unittest.mock import patch, MagicMock
5+
from django.conf import settings
6+
from django.test import override_settings
7+
from django.core.serializers import serialize
8+
9+
# Import the models used in assertions
10+
from apps.etl.models import ExtractionData, Transform, PyStacLoadData
11+
12+
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
13+
@pytest.mark.django_db
14+
def test_handle_extraction_with_mocked_request():
15+
"""
16+
Test the GIDD extraction process by mocking the request sent to the extractor.
17+
Ensures that Celery tasks run synchronously.
18+
"""
19+
settings.CELERY_TASK_ALWAYS_EAGER = True
20+
21+
# Path to XML file
22+
json_file_path = Path('/code/apps/etl/Dataset/Desinventar/DI_export_npl.xml')
23+
24+
# Read mock data from XML file
25+
with open(json_file_path, 'r', encoding='utf-8') as f:
26+
xml_data = f.read()
27+
28+
# Parse XML
29+
import xml.etree.ElementTree as ET
30+
root = ET.fromstring(xml_data)
31+
32+
# Convert XML data to a structure that can be used (optional - not used later)
33+
mock_data = []
34+
for item in root.findall('.//data_item'): # Adjust XPath as needed
35+
data_dict = {
36+
'field1': item.find('field1').text if item.find('field1') is not None else None,
37+
'field2': item.find('field2').text if item.find('field2') is not None else None,
38+
}
39+
mock_data.append(data_dict)
40+
41+
# Patch 'requests.get'
42+
with patch('requests.get') as mock_get:
43+
mock_response = MagicMock()
44+
mock_response.status_code = 200
45+
mock_response.content = xml_data.encode("utf-8")
46+
mock_response.headers = {"Content-Type": "application/xml"}
47+
mock_get.return_value = mock_response
48+
49+
# Import inside the test function to avoid circular import
50+
from apps.etl.etl_tasks.desinventar import ext_and_transform_desinventar_data
51+
52+
# Call the ETL function
53+
ext_and_transform_desinventar_data()
54+
55+
# Assertions
56+
assert ExtractionData.objects.count() == 1
57+
assert Transform.objects.count() == 1
58+
assert PyStacLoadData.objects.count() == 3592
59+
60+
# Fetch latest data
61+
latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
62+
latest_data_json = serialize('json', latest_data)
63+
64+
# Save JSON string directly to file
65+
output_path = Path('/code/output/output_desinventar.json')
66+
output_path.parent.mkdir(parents=True, exist_ok=True)
67+
with open(output_path, 'w', encoding='utf-8') as json_file:
68+
json_file.write(latest_data_json)
69+
70+
# Final assertion
71+
assert output_path.exists(), f"Expected output JSON file {output_path} was not created."

apps/etl/tests/sources/test_emdat.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import json
2+
import pytest
3+
from pathlib import Path
4+
from unittest.mock import patch, MagicMock
5+
from django.conf import settings
6+
from django.test import override_settings
7+
from django.core.serializers import serialize
8+
9+
from apps.etl.etl_tasks.emdat import ext_and_transform_emdat_latest_data
10+
from apps.etl.models import ExtractionData, Transform, PyStacLoadData
11+
from pystac_monty.sources.common import MontyDataTransformer
12+
13+
MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
14+
15+
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
16+
@pytest.mark.django_db
17+
def test_handle_extraction_with_mocked_request():
18+
"""
19+
Test the GIDD extraction process by mocking the request sent to the extractor.
20+
Ensures that Celery tasks run synchronously.
21+
"""
22+
settings.CELERY_TASK_ALWAYS_EAGER = True # Ensure Celery tasks run synchronously in tests
23+
24+
json_file_path = Path('/code/apps/etl/Dataset/EM-DAT/EM-DAT.json')
25+
26+
# Read mock data from file
27+
with open(json_file_path, 'r') as f:
28+
mock_data = json.load(f)
29+
print("Mock Data:", mock_data) # Check if data is correct
30+
31+
# Patch 'requests.get' used inside 'ext_and_transform_emdat_latest_data'
32+
with patch('requests.get') as mock_get:
33+
mock_response = MagicMock()
34+
mock_response.status_code = 200
35+
mock_response.json.return_value = mock_data # Mock .json() response
36+
37+
# Ensure that content is also correctly mocked (return valid JSON as bytes)
38+
mock_response.content = json.dumps(mock_data).encode('utf-8') # Mock .content
39+
mock_response.headers = {"Content-Type": "application/json"}
40+
41+
# Mock requests.get() to return this response
42+
mock_get.return_value = mock_response
43+
44+
# Call the function (without parameters) - it will use the patched requests.get
45+
ext_and_transform_emdat_latest_data()
46+
47+
# Assertions: Check if data was correctly extracted and stored
48+
assert ExtractionData.objects.count() == 25
49+
assert Transform.objects.count() == 25
50+
assert PyStacLoadData.objects.count() == 400 # Ensure expected number of records
51+
52+
# Fetch last processed data (latest 10 records)
53+
latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
54+
latest_data_json = serialize('json', latest_data) # Convert queryset to JSON format
55+
latest_data_dict = json.loads(latest_data_json) # Convert JSON string to dictionary
56+
57+
# Save the latest processed data to a JSON file
58+
output_path = Path('/code/output/output_emdat.json')
59+
output_path.parent.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
60+
61+
with open(output_path, 'w', encoding='utf-8') as json_file:
62+
json.dump(latest_data_dict, json_file, ensure_ascii=False, indent=4)
63+
64+
# Assert JSON file was created
65+
assert output_path.exists(), f"Expected output JSON file {output_path} was not created."

apps/etl/tests/sources/test_gidd.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import json
2+
import pytest
3+
from pathlib import Path
4+
from unittest.mock import patch, MagicMock
5+
from django.conf import settings
6+
from django.test import override_settings
7+
from django.core.serializers import serialize
8+
9+
from apps.etl.etl_tasks.gidd import ext_and_transform_gidd_latest_data
10+
from apps.etl.models import ExtractionData, Transform, PyStacLoadData
11+
12+
from pystac_monty.sources.common import MontyDataTransformer
13+
14+
MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
15+
16+
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
17+
@pytest.mark.django_db
18+
def test_handle_extraction_with_mocked_request():
19+
"""
20+
Test the GIDD extraction process by mocking the request sent to the extractor.
21+
Ensures that Celery tasks run synchronously.
22+
"""
23+
settings.CELERY_TASK_ALWAYS_EAGER = True
24+
25+
json_file_path = Path('/code/apps/etl/Dataset/IDMC-GIDD/IDMC_GIDD_Internal_Displacement_Disaggregated.geojson')
26+
27+
# Read mock data from file
28+
with open(json_file_path, 'r', encoding='utf-8') as f:
29+
mock_data = json.load(f)
30+
31+
# Patch 'requests.get' used inside 'ext_and_transform_gidd_latest_data'
32+
with patch('requests.get') as mock_get:
33+
mock_response = MagicMock()
34+
mock_response.status_code = 200
35+
mock_response.json.return_value = mock_data
36+
mock_response.content = json.dumps(mock_data).encode("utf-8")
37+
mock_response.headers = {"Content-Type": "application/geojson"}
38+
mock_get.return_value = mock_response
39+
40+
# Call the ETL function (uses patched requests.get)
41+
ext_and_transform_gidd_latest_data()
42+
43+
# Assertions
44+
assert ExtractionData.objects.count() == 1
45+
assert Transform.objects.count() == 1
46+
assert PyStacLoadData.objects.count() == 0
47+
48+
# Fetch latest data
49+
latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
50+
latest_data_json = serialize('json', latest_data)
51+
52+
# Debug (optional): print preview of JSON if needed
53+
# print("Serialized output:", latest_data_json[:300])
54+
55+
# Save JSON string directly to file
56+
output_path = Path('/code/output/output_gidd.json')
57+
output_path.parent.mkdir(parents=True, exist_ok=True)
58+
with open(output_path, 'w', encoding='utf-8') as json_file:
59+
json_file.write(latest_data_json)
60+
61+
# Final assertion
62+
assert output_path.exists(), f"Expected output JSON file {output_path} was not created."

apps/etl/tests/sources/test_glide.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import json
2+
import pytest
3+
from pathlib import Path
4+
from unittest.mock import patch, MagicMock
5+
from django.conf import settings
6+
from django.test import override_settings
7+
from django.core.serializers import serialize
8+
9+
from apps.etl.etl_tasks.glide import ext_and_transform_glide_latest_data
10+
from apps.etl.models import ExtractionData, Transform, PyStacLoadData
11+
12+
from pystac_monty.sources.common import MontyDataTransformer
13+
14+
MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
15+
16+
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
17+
@pytest.mark.django_db
18+
def test_handle_extraction_with_mocked_request():
19+
"""
20+
Test the GIDD extraction process by mocking the request sent to the extractor.
21+
Ensures that Celery tasks run synchronously.
22+
"""
23+
settings.CELERY_TASK_ALWAYS_EAGER = True # Ensure Celery tasks run synchronously in tests
24+
25+
json_file_path = Path('/code/apps/etl/Dataset/Glide/Glide.json')
26+
27+
# Read mock data from file
28+
with open(json_file_path, 'r') as f:
29+
mock_data = json.load(f)
30+
31+
# Patch 'requests.get' used inside 'ext_and_transform_gidd_latest_data'
32+
with patch('requests.get') as mock_get:
33+
mock_response = MagicMock()
34+
mock_response.status_code = 200
35+
mock_response.json.return_value = mock_data # Mock .json() response
36+
mock_response.content = json.dumps(mock_data).encode("utf-8") # Mock .content
37+
mock_response.headers = {"Content-Type": "application/json"}
38+
39+
# Mock requests.get() to return this response
40+
mock_get.return_value = mock_response
41+
42+
# Call the function (without parameters) - it will use the patched requests.get
43+
ext_and_transform_glide_latest_data()
44+
45+
# Assertions: Check if data was correctly extracted and stored
46+
assert ExtractionData.objects.count() == 25
47+
assert Transform.objects.count() == 25
48+
assert PyStacLoadData.objects.count() == 400 # Ensure expected number of records
49+
50+
# Fetch last processed data (latest 10 records)
51+
latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
52+
latest_data_json = serialize('json', latest_data) # Convert queryset to JSON format
53+
latest_data_dict = json.loads(latest_data_json) # Convert JSON string to dictionary
54+
55+
# Save the latest processed data to a JSON file
56+
output_path = Path('/code/output/output_glide.json')
57+
output_path.parent.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
58+
59+
with open(output_path, 'w', encoding='utf-8') as json_file:
60+
json.dump(latest_data_dict, json_file, ensure_ascii=False, indent=4)
61+
62+
# Assert JSON file was created
63+
assert output_path.exists(), f"Expected output JSON file {output_path} was not created."
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#ERROR RUnning but the data not stored or idk what is happening
2+
import pytest
3+
from unittest.mock import patch, MagicMock
4+
from django.conf import settings
5+
from django.test import override_settings
6+
from django.core.serializers import serialize
7+
import csv
8+
import json
9+
from pathlib import Path
10+
11+
from apps.etl.etl_tasks.noaa_IBTrACS import ext_and_transform_ibtracs_latest_data
12+
from apps.etl.models import ExtractionData, Transform, PyStacLoadData
13+
14+
from pystac_monty.sources.common import MontyDataTransformer
15+
16+
MontyDataTransformer.base_collection_url = "/code/libs/pystac-monty/monty-stac-extension/examples"
17+
18+
@override_settings(CELERY_TASK_ALWAYS_EAGER=True)
19+
@pytest.mark.django_db
20+
def test_handle_extraction_with_mocked_request():
21+
"""
22+
Test the IBTRACS extraction process by mocking the request sent to the extractor.
23+
Ensures that Celery tasks run synchronously.
24+
"""
25+
settings.CELERY_TASK_ALWAYS_EAGER = True
26+
27+
csv_file_path = Path('/code/apps/etl/Dataset/Ibtracs/ibrtacs.csv')
28+
29+
# Read CSV data into a list of dictionaries
30+
with open(csv_file_path, 'r', encoding='utf-8') as f:
31+
csv_reader = csv.DictReader(f)
32+
csv_data = [row for row in csv_reader]
33+
34+
# Patch 'requests.get' used inside 'ext_and_transform_ibtracs_latest_data'
35+
with patch('requests.get') as mock_get:
36+
mock_response = MagicMock()
37+
mock_response.status_code = 200
38+
39+
# Convert CSV data into a JSON-like structure for the mock response
40+
mock_response.json.return_value = csv_data
41+
mock_response.content = json.dumps(csv_data).encode("utf-8")
42+
mock_response.headers = {"Content-Type": "text/csv"}
43+
mock_get.return_value = mock_response
44+
45+
# Call the ETL function (uses patched requests.get)
46+
ext_and_transform_ibtracs_latest_data()
47+
48+
# Assertions
49+
assert ExtractionData.objects.count() == 1
50+
assert Transform.objects.count() == 1
51+
assert PyStacLoadData.objects.count() == 0
52+
53+
# Fetch the latest data
54+
latest_data = PyStacLoadData.objects.all().order_by('-id')[:10]
55+
latest_data_json = serialize('json', latest_data)
56+
57+
# Save the JSON string to a file
58+
output_path = Path('/code/output/output_ibtracs.json')
59+
output_path.parent.mkdir(parents=True, exist_ok=True)
60+
with open(output_path, 'w', encoding='utf-8') as json_file:
61+
json_file.write(latest_data_json)
62+
63+
# Final assertion
64+
assert output_path.exists(), f"Expected output JSON file {output_path} was not created."

0 commit comments

Comments
 (0)