11import os
22import shutil
3+ import sqlite3
34import subprocess
4- import sys
55from pathlib import Path
66from tempfile import TemporaryDirectory
77
8+ import geopandas as gpd
9+ import pandas as pd
10+
811
912def test_kaitak_ags3_notebook_runs_and_creates_gpkg (examples_dir ):
13+ """Tests the Kai Tak, Hong Kong AGS 3 example marimo notebook.
14+
15+ Tests that the `hk_kaitak_ags3_to_brgi_geodb.py` marimo notebook:
16+ - Runs successfully as a script using `uvx uv run` with the Python version and
17+ dependencies specified in the PEP 723 inline script metadata.
18+ - Creates a valid GeoPackage
19+ - That the GeoPackage contains the expected tables
20+ - That the Project, Location, Sample, InSitu_GEOL, InSitu_ISPT and InSitu_WETH
21+ tables have the expected number of rows.
22+ """
1023 notebook_dir = examples_dir / "hk_kaitak_ags3"
1124 notebook_path = notebook_dir / "hk_kaitak_ags3_to_brgi_geodb.py"
1225 gpkg_output_path = notebook_dir / "kaitak_gi.gpkg"
@@ -19,14 +32,15 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
1932 # to the one created when executing the notebook.
2033 # And to put back to the original state at the end of the test.
2134 with TemporaryDirectory () as temp_dir :
22- temp_gpkg_path = Path (temp_dir ) / "temp_kaitak_gi.gpkg"
23- shutil .move (gpkg_output_path , temp_gpkg_path )
35+ temp_original_gpkg_path = Path (temp_dir ) / "temp_kaitak_gi.gpkg"
36+ shutil .move (gpkg_output_path , temp_original_gpkg_path )
2437
25- print (f"Running: `python { notebook_path } `\n " )
2638 # Run the notebook as a script
2739 # TODO: implement logging
2840 # NOTE: The env (environment variables) and encoding are required for running
29- # the notebook as a script from both Windows and Linux. Wihtout: UnicodeDecodeError
41+ # the notebook as a script from both Windows and Linux. Without => UnicodeDecodeError
42+ # NOTE: `uvx uv run` runs the marimo notebook as a script in a temporary environment,
43+ # with the Python version and dependencies specified in the PEP 723 inline script metadata.
3044 env = os .environ .copy ()
3145 env ["PYTHONIOENCODING" ] = "utf-8"
3246 result = subprocess .run (
@@ -40,7 +54,7 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
4054
4155 # Check that the script ran successfully
4256 assert result .returncode == 0 , (
43- f"\n 📛 Running `uvx run marimo notebook.py` failed with code { result .returncode } \n "
57+ f"📛 Running `uvx run marimo notebook.py` failed with code { result .returncode } \n "
4458 f"📄 STDOUT:\n { result .stdout } \n "
4559 f"⚠️ STDERR:\n { result .stderr } "
4660 )
@@ -50,10 +64,76 @@ def test_kaitak_ags3_notebook_runs_and_creates_gpkg(examples_dir):
5064 f"The expected GeoPackage { gpkg_output_path } was not created."
5165 )
5266
53- # TODO: write some logic to compare the original and new GeoPackages.
67+ # Compare the original and new GeoPackages and check the number of rows
68+ # in the important tables.
69+ conn_original = sqlite3 .connect (temp_original_gpkg_path )
70+ conn_output = sqlite3 .connect (gpkg_output_path )
71+
72+ tables_original = conn_original .execute (
73+ "SELECT name FROM sqlite_master WHERE type='table';"
74+ ).fetchall ()
75+ conn_original .close ()
76+ tables_output = conn_output .execute (
77+ "SELECT name FROM sqlite_master WHERE type='table';"
78+ ).fetchall ()
79+ conn_output .close ()
80+
81+ assert tables_original == tables_output , (
82+ f"The original GeoPackage { temp_original_gpkg_path .name } and the output "
83+ f"GeoPackage { gpkg_output_path .name } have different tables:\n "
84+ f"Original: { tables_original } \n "
85+ f"Output: { tables_output } "
86+ )
87+
88+ important_tables = [
89+ {
90+ "table_name" : "Project" ,
91+ "no_rows" : 88 ,
92+ },
93+ {
94+ "table_name" : "Location" ,
95+ "no_rows" : 754 ,
96+ },
97+ {
98+ "table_name" : "Sample" ,
99+ "no_rows" : 17_774 ,
100+ },
101+ {
102+ "table_name" : "InSitu_GEOL" ,
103+ "no_rows" : 7_764 ,
104+ },
105+ {
106+ "table_name" : "InSitu_ISPT" ,
107+ "no_rows" : 3_986 ,
108+ },
109+ {
110+ "table_name" : "InSitu_WETH" ,
111+ "no_rows" : 3_928 ,
112+ },
113+ ]
114+ for table in important_tables :
115+ gdf_output = gpd .read_file (gpkg_output_path , layer = table ["table_name" ])
116+ assert len (gdf_output ) == table ["no_rows" ], (
117+ f"The output GeoPackage { gpkg_output_path .name } table { table ['table_name' ]} "
118+ f"has { len (gdf_output )} rows instead of { table ['no_rows' ]} ."
119+ )
120+ gdf_original = gpd .read_file (
121+ temp_original_gpkg_path , layer = table ["table_name" ]
122+ )
123+ pd .testing .assert_frame_equal (
124+ gdf_original , gdf_output , check_exact = False , rtol = 1e-5
125+ )
126+ # It's also possible to assert that GIS geometries are not exactly equal.
127+ # However, when testing the equality of GeoDataFrames with pandas, the GIS
128+ # geometry are compared precisely, because the geometry is converted to a
129+ # WKT string and compared as strings. Therefore, if a less precise comparison
130+ # of GIS geometries is necessary, the assertion above needs changing too.
131+ # gpd.testing.assert_geoseries_equal(
132+ # gdf_original, gdf_output, check_less_precise=False
133+ # )
54134
55135 # Remove the newly generated kaitak_gi.gpkg
56136 os .remove (gpkg_output_path )
57137 # Place back the original kaitak_gi.gpkg from the temporary directory
58138 # to its original location.
59- shutil .move (temp_gpkg_path , gpkg_output_path )
139+ shutil .move (temp_original_gpkg_path , gpkg_output_path )
0 commit comments