Skip to content

Commit 177c1ea

Browse files
committed
feat: add collection validation
Includes: - Python dependencies file and instructions - CI - Actual fixes
1 parent 2a6b114 commit 177c1ea

17 files changed

+190
-50
lines changed

.github/workflows/ci.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Continuous integration
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
validate:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v4
15+
- name: Setup Python
16+
uses: actions/setup-python@v4
17+
with:
18+
python-version: "3.11"
19+
cache: "pip"
20+
- name: Install dependencies
21+
run: pip install -r requirements.txt
22+
- name: Validate collections
23+
run: python scripts/validate_collections.py

README.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,35 @@ Should follow the following format:
105105
"dry_run": "<true/false>",
106106
}
107107
```
108+
109+
## Validation
110+
111+
This repository provides a script for validating all collections.
112+
First, install the requirements (preferably in a virtual environment):
113+
114+
```shell
115+
pip install -r requirements.txt
116+
```
117+
118+
Then:
119+
120+
```shell
121+
python scripts/validate_collections.py
122+
```
123+
124+
## Development
125+
126+
If you need to add new dependencies, first install the requirements:
127+
128+
```shell
129+
pip install -r requirements.txt
130+
```
131+
132+
Add your dependency to `requirements.in` *without a version specifier* (unless you really need one).
133+
Then run:
134+
135+
```shell
136+
pip-compile
137+
```
138+
139+
This will update `requirements.txt` with a complete, realized set of Python dependencies.

ingestion-data/collections/caldor-fire-behavior.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-180",
12-
"90",
13-
"-90",
14-
"180"
11+
-180,
12+
90,
13+
-90,
14+
180
1515
]
1616
]
1717
},

ingestion-data/collections/caldor-fire-burn-severity.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-180",
12-
"90",
13-
"-90",
14-
"180"
11+
-180,
12+
90,
13+
-90,
14+
180
1515
]
1616
]
1717
},

ingestion-data/collections/disturbance-probability.json

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
{
2-
"collection": "disturbance-probability-percentile",
2+
"type": "Collection",
3+
"stac_version": "1.0.0",
4+
"id": "disturbance-probability-percentile",
35
"title": "Near Real-time Disturbance probability map (%)",
46
"data_type": "cog",
5-
"spatial_extent": {
6-
"xmin": -84.132,
7-
"ymin": 25.224,
8-
"xmax": -79.853,
9-
"ymax": 30.728
10-
},
11-
"temporal_extent": {
12-
"startdate": "2022-10-03T00:00:00Z",
13-
"enddate": "2022-10-03T23:59:59Z"
7+
"extent": {
8+
"spatial": {
9+
"bbox": [
10+
-84.132,
11+
25.224,
12+
-79.853,
13+
30.728
14+
]
15+
},
16+
"temporal": {
17+
"interval": [
18+
[
19+
"2022-10-03T00:00:00Z",
20+
"2022-10-03T23:59:59Z"
21+
]
22+
]
23+
}
1424
},
1525
"license": "CC-BY-NC-SA-1.0",
1626
"providers": [
@@ -52,5 +62,6 @@
5262
"bucket": "veda-data-store-staging",
5363
"filename_regex": "(.*)spec_prob_mosaic_2022-10-03_day.tif$"
5464
}
55-
]
65+
],
66+
"links": []
5667
}

ingestion-data/collections/ecco-surface-height-change.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-180",
12-
"90",
13-
"-90",
14-
"180"
11+
-180,
12+
90,
13+
-90,
14+
180
1515
]
1616
]
1717
},

ingestion-data/collections/lis-tws-nonstationarity-index.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
]
2121
}
2222
},
23-
"license": "Creative Commons Zero (CC0-1.0)",
23+
"license": "CC0-1.0",
2424
"description": "The global Terrestrial Water Storage (TWS) non-stationarity index integrates the trend, seasonal shifts, and variability change of TWS for the period of 2003 - 2020. TWS is derived by jointly assimilating the MODIS Leaf Area Index, the ESA CCI surface soil moisture, and the GSFC GRACE mascon-based TWS anomalies into the Noah-MP land surface model within the NASA Land Information System (LIS) at 10 km spatial resolution forced by the combination of MERRA2 and IMERG meteorological fields. The smaller the non-stationarity index is, the more the water cycle is under a non-stationary process. Glaciers and Greenland are excluded from the analysis.",
2525
"item_assets": {
2626
"cog_default": {

ingestion-data/collections/modis-fire-anomalies-diff-covid-19-changing-landscape.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-125.0",
12-
"24.0",
13-
"-75.0",
14-
"43.0"
11+
-125.0,
12+
24.0,
13+
-75.0,
14+
43.0
1515
]
1616
]
1717
},

ingestion-data/collections/mtbs-burn-severity.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-126.49459612498832",
12-
"24.0478678762251",
13-
"-71.50752568733597",
14-
"50.55916724898132"
11+
-126.494596,
12+
24.047867,
13+
-71.507525,
14+
50.559167
1515
]
1616
]
1717
},

ingestion-data/collections/nceo-africa-2017.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"title": "NCEO Africa Aboveground Woody Biomass 2017",
1313
"extent": {
1414
"spatial": {
15-
" bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
15+
"bbox": [[-18.2735295, -35.054059, 51.8642329, 37.7310386]]
1616
},
1717
"temporal": {
1818
"interval": [["2017-01-01T00:00:00Z", "2018-01-01T00:00:00Z"]]

ingestion-data/collections/pzd-anomaly-covid-19.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-92.2",
12-
"40.9975",
13-
"-76.0",
14-
"49.09"
11+
-92.2,
12+
40.9975,
13+
-76.0,
14+
49.09
1515
]
1616
]
1717
},
1818
"temporal":{
1919
"interval":[
2020
[
2121
"2020-01-01T00:00:00Z",
22-
"2021-23-31T23:59:59Z"
22+
"2021-12-31T23:59:59Z"
2323
]
2424
]
2525
}

ingestion-data/collections/recovery-proxy-maps-covid-19.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-180",
12-
"90",
13-
"-90",
14-
"180"
11+
-180,
12+
90,
13+
-90,
14+
180
1515
]
1616
]
1717
},

ingestion-data/collections/slowdown-proxy-covid-19-changing-landscapes.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-180",
12-
"90",
13-
"-90",
14-
"180"
11+
-180,
12+
90,
13+
-90,
14+
180
1515
]
1616
]
1717
},

ingestion-data/collections/togo-agriculture-covid-19.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
"spatial":{
99
"bbox":[
1010
[
11-
"-0.14",
12-
"6.10",
13-
"1.80",
14-
"11.13"
11+
-0.14,
12+
6.10,
13+
1.80,
14+
11.13
1515
]
1616
]
1717
},

requirements.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pip-tools
2+
pystac[validation]

requirements.txt

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#
2+
# This file is autogenerated by pip-compile with Python 3.11
3+
# by the following command:
4+
#
5+
# pip-compile
6+
#
7+
attrs==23.1.0
8+
# via jsonschema
9+
build==1.0.3
10+
# via pip-tools
11+
click==8.1.7
12+
# via pip-tools
13+
jsonschema==4.17.3
14+
# via pystac
15+
packaging==23.1
16+
# via build
17+
pip-tools==7.3.0
18+
# via -r requirements.in
19+
pyproject-hooks==1.0.0
20+
# via build
21+
pyrsistent==0.19.3
22+
# via jsonschema
23+
pystac[validation]==1.8.3
24+
# via -r requirements.in
25+
python-dateutil==2.8.2
26+
# via pystac
27+
six==1.16.0
28+
# via python-dateutil
29+
wheel==0.41.2
30+
# via pip-tools
31+
32+
# The following packages are considered to be unsafe in a requirements file:
33+
# pip
34+
# setuptools

scripts/validate_collections.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env python3
2+
3+
"""Validates all collections in ingestion-data/collections"""
4+
5+
import json
6+
import sys
7+
from pathlib import Path
8+
9+
from pystac import Collection, STACValidationError
10+
11+
root = Path(__file__).parents[1]
12+
collections = root / "ingestion-data" / "collections"
13+
14+
errors = dict()
15+
for path in collections.rglob("*.json"):
16+
try:
17+
collection = Collection.from_file(str(path))
18+
except Exception as error:
19+
errors[path.name] = {
20+
"type": "error",
21+
"message": f"cannot read collection, {type(error)}: {error}",
22+
}
23+
continue
24+
try:
25+
collection.validate()
26+
except STACValidationError as error:
27+
if isinstance(error.source, list):
28+
message = [str(e) for e in error.source]
29+
else:
30+
message = str(error.source)
31+
errors[path.name] = {
32+
"type": "invalid",
33+
"message": message,
34+
}
35+
36+
if errors:
37+
json.dump(errors, sys.stdout, indent=2)
38+
sys.exit(1)

0 commit comments

Comments
 (0)