Skip to content

Commit 8789513

Browse files
author
Jon Duckworth
authored
Merge pull request #402 from duckontheweb/340-non-stac-error
Raise exception in identify_stac_object for non-STAC objects
2 parents eb15669 + 36303f6 commit 8789513

File tree

20 files changed

+128
-3293
lines changed

20 files changed

+128
-3293
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ stdout*
1010
.idea
1111
.vscode
1212

13+
1314
# Sphinx documentation
1415
.ipynb_checkpoints/
1516

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414

1515
### Fixed
1616

17-
- Fixed returned None by `EOExtension.get_bands` for asset without EO bands ([#406](https://github.com/stac-utils/pystac/pull/406))
17+
- `EOExtension.get_bands` returns `None` for asset without EO bands ([#406](https://github.com/stac-utils/pystac/pull/406))
18+
- `identify_stac_object_type` returns `None` and `identify_stac_object` raises `STACTypeError` for non-STAC objects
19+
([#402](https://github.com/stac-utils/pystac/pull/402))
1820

1921
### Removed
2022

pystac/extensions/eo.py

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
https://github.com/stac-extensions/eo
44
"""
55

6-
import re
76
from typing import (
87
Any,
98
Dict,
@@ -498,36 +497,6 @@ class EOExtensionHooks(ExtensionHooks):
498497
def migrate(
499498
self, obj: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
500499
) -> None:
501-
if version < "0.5":
502-
if "eo:crs" in obj["properties"]:
503-
# Try to pull out the EPSG code.
504-
# Otherwise, just leave it alone.
505-
wkt = obj["properties"]["eo:crs"]
506-
matches = list(re.finditer(r'AUTHORITY\[[^\]]*\"(\d+)"\]', wkt))
507-
if len(matches) > 0:
508-
epsg_code = matches[-1].group(1)
509-
obj["properties"].pop("eo:crs")
510-
obj["properties"]["eo:epsg"] = int(epsg_code)
511-
512-
if version < "0.6":
513-
# Change eo:bands from a dict to a list. eo:bands on an asset
514-
# is an index instead of a dict key. eo:bands is in properties.
515-
bands_dict = obj["eo:bands"]
516-
keys_to_indices: Dict[str, int] = {}
517-
bands: List[Dict[str, Any]] = []
518-
for i, (k, band) in enumerate(bands_dict.items()):
519-
keys_to_indices[k] = i
520-
bands.append(band)
521-
522-
obj.pop("eo:bands")
523-
obj["properties"]["eo:bands"] = bands
524-
for k, asset in obj["assets"].items():
525-
if "eo:bands" in asset:
526-
asset_band_indices: List[int] = []
527-
for bk in asset["eo:bands"]:
528-
asset_band_indices.append(keys_to_indices[bk])
529-
asset["eo:bands"] = sorted(asset_band_indices)
530-
531500
if version < "0.9":
532501
# Some eo fields became common_metadata
533502
if (

pystac/serialization/identify.py

Lines changed: 54 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -185,107 +185,12 @@ def _identify_stac_extensions(
185185

186186
# checksum
187187
if "links" in d:
188-
found_checksum = False
189188
for link in d["links"]:
190-
# Account for old links as dicts
191-
if isinstance(link, str):
192-
link_props = cast(Dict[str, Any], d["links"][link]).keys()
193-
else:
194-
link_props = cast(Dict[str, Any], link).keys()
189+
link_props = cast(Dict[str, Any], link).keys()
195190

196191
if any(prop.startswith("checksum:") for prop in link_props):
197-
found_checksum = True
198192
stac_extensions.add(OldExtensionShortIDs.CHECKSUM.value)
199-
if not found_checksum:
200-
if "assets" in d:
201-
for asset in d["assets"].values():
202-
asset_props = cast(Dict[str, Any], asset).keys()
203-
if any(prop.startswith("checksum:") for prop in asset_props):
204-
found_checksum = True
205-
stac_extensions.add(OldExtensionShortIDs.CHECKSUM.value)
206-
if found_checksum:
207-
version_range.set_min(STACVersionID("0.6.2"))
208-
209-
# datacube
210-
if object_type == pystac.STACObjectType.ITEM:
211-
if any(k.startswith("cube:") for k in cast(Dict[str, Any], d["properties"])):
212-
stac_extensions.add(OldExtensionShortIDs.DATACUBE.value)
213-
version_range.set_min(STACVersionID("0.6.1"))
214-
215-
# datetime-range (old extension)
216-
if object_type == pystac.STACObjectType.ITEM:
217-
if "dtr:start_datetime" in d["properties"]:
218-
stac_extensions.add("datetime-range")
219-
version_range.set_min(STACVersionID("0.6.0"))
220-
221-
# eo
222-
if object_type == pystac.STACObjectType.ITEM:
223-
if any(k.startswith("eo:") for k in cast(Dict[str, Any], d["properties"])):
224-
stac_extensions.add(OldExtensionShortIDs.EO.value)
225-
if "eo:epsg" in d["properties"]:
226-
if d["properties"]["eo:epsg"] is None:
227-
version_range.set_min(STACVersionID("0.6.1"))
228-
if "eo:crs" in d["properties"]:
229-
version_range.set_max(STACVersionID("0.4.1"))
230-
if "eo:constellation" in d["properties"]:
231-
version_range.set_min(STACVersionID("0.6.0"))
232-
if "eo:bands" in d:
233-
stac_extensions.add(OldExtensionShortIDs.EO.value)
234-
version_range.set_max(STACVersionID("0.5.2"))
235-
236-
# pointcloud
237-
if object_type == pystac.STACObjectType.ITEM:
238-
if any(k.startswith("pc:") for k in cast(Dict[str, Any], d["properties"])):
239-
stac_extensions.add(OldExtensionShortIDs.POINTCLOUD.value)
240-
version_range.set_min(STACVersionID("0.6.2"))
241-
242-
# sar
243-
if object_type == pystac.STACObjectType.ITEM:
244-
if any(k.startswith("sar:") for k in cast(Dict[str, Any], d["properties"])):
245-
stac_extensions.add(OldExtensionShortIDs.SAR.value)
246-
version_range.set_min(STACVersionID("0.6.2"))
247-
if version_range.contains("0.6.2"):
248-
for prop in [
249-
"sar:absolute_orbit",
250-
"sar:resolution",
251-
"sar:pixel_spacing",
252-
"sar:looks",
253-
]:
254-
if prop in d["properties"]:
255-
if isinstance(d["properties"][prop], list):
256-
version_range.set_max(STACVersionID("0.6.2"))
257-
if version_range.contains("0.7.0"):
258-
for prop in [
259-
"sar:incidence_angle",
260-
"sar:relative_orbit",
261-
"sar:observation_direction",
262-
"sar:resolution_range",
263-
"sar:resolution_azimuth",
264-
"sar:pixel_spacing_range",
265-
"sar:pixel_spacing_azimuth",
266-
"sar:looks_range",
267-
"sar:looks_azimuth",
268-
"sar:looks_equivalent_number",
269-
]:
270-
if prop in d["properties"]:
271-
version_range.set_min(STACVersionID("0.7.0"))
272-
if "sar:absolute_orbit" in d["properties"] and not isinstance(
273-
d["properties"]["sar:absolute_orbit"], list
274-
):
275-
version_range.set_min(STACVersionID("0.7.0"))
276-
if "sar:off_nadir" in d["properties"]:
277-
version_range.set_max(STACVersionID("0.6.2"))
278-
279-
# scientific
280-
if (
281-
object_type == pystac.STACObjectType.ITEM
282-
or object_type == pystac.STACObjectType.COLLECTION
283-
):
284-
if "properties" in d:
285-
prop_keys = cast(Dict[str, Any], d["properties"]).keys()
286-
if any(k.startswith("sci:") for k in prop_keys):
287-
stac_extensions.add(OldExtensionShortIDs.SCIENTIFIC.value)
288-
version_range.set_min(STACVersionID("0.6.0"))
193+
version_range.set_min(STACVersionID("0.6.2"))
289194

290195
# Single File STAC
291196
if object_type == pystac.STACObjectType.ITEMCOLLECTION:
@@ -298,40 +203,59 @@ def _identify_stac_extensions(
298203
return list(stac_extensions)
299204

300205

301-
def identify_stac_object_type(json_dict: Dict[str, Any]) -> "STACObjectType_Type":
302-
"""Determines the STACObjectType of the provided JSON dict.
206+
def identify_stac_object_type(
207+
json_dict: Dict[str, Any]
208+
) -> Optional["STACObjectType_Type"]:
209+
"""Determines the STACObjectType of the provided JSON dict. If the JSON dict does
210+
not represent a STAC object, returns ``None``.
303211
304-
Args:
305-
json_dict : The dict of STAC JSON to identify.
212+
Will first try to identify the object using ``"type"`` field as described in the
213+
guidelines in :stac-spec:`How to Differentiate STAC Files
214+
<best-practices.md#how-to-differentiate-stac-files>`. If this fails, will fall back
215+
to using the pre-1.0 heuristic described in `this issue
216+
<https://github.com/radiantearth/stac-spec/issues/889#issuecomment-684529444>`__
306217
307-
Returns:
308-
STACObjectType: The object type represented by the JSON.
218+
Args:
219+
json_dict : The dict of JSON to identify.
309220
"""
310-
object_type = None
311-
312-
if "type" in json_dict: # Try to identify using 'type' property
221+
# Try to identify using 'type' property, if present
222+
if "type" in json_dict:
223+
# Try to find 'type' property in known STACObjectType values
313224
for t in pystac.STACObjectType:
314225
if json_dict["type"].lower() == t.value.lower():
315-
object_type = t
316-
break
317-
318-
if object_type is None: # Use old-approach based on other properties
319-
# Identify pre-1.0 ITEMCOLLECTION (since removed)
320-
if "type" in json_dict and "assets" not in json_dict:
321-
if "stac_version" in json_dict and json_dict["stac_version"].startswith(
322-
"0"
323-
):
324-
if json_dict["type"] == "FeatureCollection":
325-
object_type = pystac.STACObjectType.ITEMCOLLECTION
326-
327-
if "extent" in json_dict:
328-
object_type = pystac.STACObjectType.COLLECTION
329-
elif "assets" in json_dict:
330-
object_type = pystac.STACObjectType.ITEM
226+
return t
227+
228+
obj_type = json_dict.get("type")
229+
230+
# For pre-1.0 objects for version 0.8.* or later 'stac_version' must be present,
231+
# except for in ItemCollections (which are handled in the else clause)
232+
if "stac_version" in json_dict:
233+
# Pre-1.0 STAC objects with 'type' == "Feature" are Items
234+
if obj_type == "Feature":
235+
return pystac.STACObjectType.ITEM
236+
# Pre-1.0 STAC objects with 'type' == "FeatureCollection" are ItemCollections
237+
if obj_type == "FeatureCollection":
238+
return pystac.STACObjectType.ITEMCOLLECTION
239+
# Anything else with a 'type' field is not a STAC object
240+
if obj_type is not None:
241+
return None
242+
243+
# Collections will contain either an 'extent' or a 'license' (or both)
244+
if "extent" in json_dict or "license" in json_dict:
245+
return pystac.STACObjectType.COLLECTION
246+
# Everything else that has a stac_version is a Catalog
331247
else:
332-
object_type = pystac.STACObjectType.CATALOG
333-
334-
return object_type
248+
return pystac.STACObjectType.CATALOG
249+
else:
250+
# Prior to STAC 0.9 ItemCollections did not have a stac_version field and could
251+
# only be identified by the fact that all of their 'features' are STAC Items
252+
if obj_type == "FeatureCollection":
253+
if all(
254+
identify_stac_object_type(feat) == pystac.STACObjectType.ITEM
255+
for feat in json_dict.get("features", [])
256+
):
257+
return pystac.STACObjectType.ITEMCOLLECTION
258+
return None
335259

336260

337261
def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
@@ -346,21 +270,16 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
346270
"""
347271
object_type = identify_stac_object_type(json_dict)
348272

273+
if object_type is None:
274+
raise pystac.STACTypeError("JSON does not represent a STAC object.")
275+
349276
version_range = STACVersionRange()
350277

351278
stac_version = json_dict.get("stac_version")
352279
stac_extensions = json_dict.get("stac_extensions", None)
353280

354281
if stac_version is None:
355-
if (
356-
object_type == pystac.STACObjectType.CATALOG
357-
or object_type == pystac.STACObjectType.COLLECTION
358-
):
359-
version_range.set_max(STACVersionID("0.5.2"))
360-
elif object_type == pystac.STACObjectType.ITEM:
361-
version_range.set_max(STACVersionID("0.7.0"))
362-
else: # ItemCollection
363-
version_range.set_min(STACVersionID("0.8.0"))
282+
version_range.set_min(STACVersionID("0.8.0"))
364283
else:
365284
version_range.set_to_single(stac_version)
366285

@@ -372,7 +291,7 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
372291
# if the stac_extensions property doesn't exist for everything
373292
# but ItemCollection (except after 0.9.0, when ItemCollection also got
374293
# the stac_extensions property).
375-
if version_range.is_earlier_than("0.8.0") or (
294+
if (
376295
object_type == pystac.STACObjectType.ITEMCOLLECTION
377296
and not version_range.is_later_than("0.8.1")
378297
):
@@ -390,21 +309,4 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
390309
# code translates the short name IDs used pre-1.0.0-RC1 to the
391310
# relevant extension schema uri identifier.
392311

393-
if not version_range.is_single_version():
394-
# Final Checks
395-
396-
if "links" in json_dict:
397-
# links were a dictionary only in 0.5
398-
if "links" in json_dict and isinstance(json_dict["links"], dict):
399-
version_range.set_to_single(STACVersionID("0.5.2"))
400-
401-
# self links became non-required in 0.7.0
402-
if not version_range.is_earlier_than("0.7.0") and not any(
403-
filter(
404-
lambda l: cast(Dict[str, Any], l)["rel"] == pystac.RelType.SELF,
405-
json_dict["links"],
406-
)
407-
):
408-
version_range.set_min(STACVersionID("0.7.0"))
409-
410312
return STACJSONDescription(object_type, version_range, set(stac_extensions))

pystac/serialization/migrate.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,9 @@
1313
from pystac import STACObjectType as STACObjectType_Type
1414

1515

16-
def _migrate_links(d: Dict[str, Any], version: STACVersionID) -> None:
17-
if version < "0.6":
18-
if "links" in d:
19-
if isinstance(d["links"], dict):
20-
d["links"] = list(d["links"].values())
21-
22-
2316
def _migrate_catalog(
2417
d: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
2518
) -> None:
26-
_migrate_links(d, version)
27-
2819
if version < "0.8":
2920
d["stac_extensions"] = list(info.extensions)
3021

@@ -38,10 +29,8 @@ def _migrate_collection(
3829
def _migrate_item(
3930
d: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
4031
) -> None:
41-
_migrate_links(d, version)
42-
43-
if version < "0.8":
44-
d["stac_extensions"] = list(info.extensions)
32+
# No migrations necessary for supported STAC versions (>=0.8)
33+
pass
4534

4635

4736
def _migrate_itemcollection(

0 commit comments

Comments
 (0)