Skip to content

Commit 353f79f

Browse files
authored
Merge pull request #33 from bedrock-engineer/test-kaitak-hk-notebook-bed-26
Test that the GeoPackage that is the output of the HK example notebook is correct
2 parents 70ca133 + 94f021f commit 353f79f

File tree

2 files changed

+88
-8
lines changed

2 files changed

+88
-8
lines changed
0 Bytes
Binary file not shown.

tests/test_examples/test_hk_kaitak_ags3_to_brgi_geodb.py

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,25 @@
11
import os
22
import shutil
3+
import sqlite3
34
import subprocess
4-
import sys
55
from pathlib import Path
66
from tempfile import TemporaryDirectory
77

8+
import geopandas as gpd
9+
import pandas as pd
10+
811

912
def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
13+
"""Tests the Kai Tak, Hong Kong AGS 3 example marimo notebook.
14+
15+
Tests that the `hk_kaitak_ags3_to_brgi_geodb.py` marimo notebook:
16+
- Runs successfully as a script using `uvx uv run` with the Python version and
17+
dependencies specified in the PEP 723 inline script metadata.
18+
- Creates a valid GeoPackage
19+
- That the GeoPackage contains the expected tables
20+
- That the Project, Location, Sample, InSitu_GEOL, InSitu_ISPT and InSitu_WETH
21+
tables have the expected number of rows.
22+
"""
1023
notebook_dir = examples_dir / "hk_kaitak_ags3"
1124
notebook_path = notebook_dir / "hk_kaitak_ags3_to_brgi_geodb.py"
1225
gpkg_output_path = notebook_dir / "kaitak_gi.gpkg"
@@ -19,14 +32,15 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
1932
# to the one created when executing the notebook.
2033
# And to put back to the original state at the end of the test.
2134
with TemporaryDirectory() as temp_dir:
22-
temp_gpkg_path = Path(temp_dir) / "temp_kaitak_gi.gpkg"
23-
shutil.move(gpkg_output_path, temp_gpkg_path)
35+
temp_original_gpkg_path = Path(temp_dir) / "temp_kaitak_gi.gpkg"
36+
shutil.move(gpkg_output_path, temp_original_gpkg_path)
2437

25-
print(f"Running: `python {notebook_path}`\n")
2638
# Run the notebook as a script
2739
# TODO: implement logging
2840
# NOTE: The env (environment variables) and encoding are required for running
29-
# the notebook as a script from both Windows and Linux. Wihtout: UnicodeDecodeError
41+
# the notebook as a script from both Windows and Linux. Without => UnicodeDecodeError
42+
# NOTE: `uvx uv run` runs the marimo notebook as a script in a temporary environment,
43+
# with the Python version and dependencies specified in the PEP 723 inline script metadata.
3044
env = os.environ.copy()
3145
env["PYTHONIOENCODING"] = "utf-8"
3246
result = subprocess.run(
@@ -40,7 +54,7 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
4054

4155
# Check that the script ran successfully
4256
assert result.returncode == 0, (
43-
f"\n📛 Running `uvx run marimo notebook.py` failed with code {result.returncode}\n"
57+
f"📛 Running `uvx run marimo notebook.py` failed with code {result.returncode}\n"
4458
f"📄 STDOUT:\n{result.stdout}\n"
4559
f"⚠️ STDERR:\n{result.stderr}"
4660
)
@@ -50,10 +64,76 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
5064
f"The expected GeoPackage {gpkg_output_path} was not created."
5165
)
5266

53-
# TODO: write some logic to compare the original and new GeoPackages.
67+
# Compare the original and new GeoPackages and check the number of rows
68+
# in the important tables.
69+
conn_original = sqlite3.connect(temp_original_gpkg_path)
70+
conn_output = sqlite3.connect(gpkg_output_path)
71+
72+
tables_original = conn_original.execute(
73+
"SELECT name FROM sqlite_master WHERE type='table';"
74+
).fetchall()
75+
conn_original.close()
76+
tables_output = conn_output.execute(
77+
"SELECT name FROM sqlite_master WHERE type='table';"
78+
).fetchall()
79+
conn_output.close()
80+
81+
assert tables_original == tables_output, (
82+
f"The original GeoPackage {temp_original_gpkg_path.name} and the output "
83+
f"GeoPackage {gpkg_output_path.name} have different tables:\n"
84+
f"Original: {tables_original}\n"
85+
f"Output: {tables_output}"
86+
)
87+
88+
important_tables = [
89+
{
90+
"table_name": "Project",
91+
"no_rows": 88,
92+
},
93+
{
94+
"table_name": "Location",
95+
"no_rows": 754,
96+
},
97+
{
98+
"table_name": "Sample",
99+
"no_rows": 17_774,
100+
},
101+
{
102+
"table_name": "InSitu_GEOL",
103+
"no_rows": 7_764,
104+
},
105+
{
106+
"table_name": "InSitu_ISPT",
107+
"no_rows": 3_986,
108+
},
109+
{
110+
"table_name": "InSitu_WETH",
111+
"no_rows": 3_928,
112+
},
113+
]
114+
for table in important_tables:
115+
gdf_output = gpd.read_file(gpkg_output_path, layer=table["table_name"])
116+
assert len(gdf_output) == table["no_rows"], (
117+
f"The output GeoPackage {gpkg_output_path.name} table {table['table_name']} "
118+
f"has {len(gdf_output)} rows instead of {table['no_rows']}."
119+
)
120+
gdf_original = gpd.read_file(
121+
temp_original_gpkg_path, layer=table["table_name"]
122+
)
123+
pd.testing.assert_frame_equal(
124+
gdf_original, gdf_output, check_exact=False, rtol=1e-5
125+
)
126+
# It's also possible to assert that GIS geometries are not exactly equal.
127+
# However, when testing the equality of GeoDataFrames with pandas, the GIS
128+
# geometry are compared precisely, because the geometry is converted to a
129+
# WKT string and compared as strings. Therefore, if a less precise comparison
130+
# of GIS geometries is necessary, the assertion above needs changing too.
131+
# gpd.testing.assert_geoseries_equal(
132+
# gdf_original, gdf_output, check_less_precise=False
133+
# )
54134

55135
# Remove the newly generated kaitak_gi.gpkg
56136
os.remove(gpkg_output_path)
57137
# Place back the original kaitak_gi.gpkg from the temporary directory
58138
# to its original location.
59-
shutil.move(temp_gpkg_path, gpkg_output_path)
139+
shutil.move(temp_original_gpkg_path, gpkg_output_path)

0 commit comments

Comments
 (0)