Skip to content

Commit af1ab82

Browse files
committed
Update logic for determining object type
1 parent da7e71b commit af1ab82

File tree

3 files changed

+76
-33
lines changed

3 files changed

+76
-33
lines changed

pystac/serialization/identify.py

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -298,40 +298,59 @@ def _identify_stac_extensions(
298298
return list(stac_extensions)
299299

300300

301-
def identify_stac_object_type(json_dict: Dict[str, Any]) -> "STACObjectType_Type":
302-
"""Determines the STACObjectType of the provided JSON dict.
301+
def identify_stac_object_type(
302+
json_dict: Dict[str, Any]
303+
) -> Optional["STACObjectType_Type"]:
304+
"""Determines the STACObjectType of the provided JSON dict. If the JSON dict does
305+
not represent a STAC object, returns ``None``.
306+
307+
Will first try to identify the object using ``"type"`` field as described in the
308+
guidelines in :stac-spec:`How to Differentiate STAC Files
309+
<best-practices.md#how-to-differentiate-stac-files>`. If this fails, will fall back
310+
to using the pre-1.0 heuristic described in `this issue
311+
<https://github.com/radiantearth/stac-spec/issues/889#issuecomment-684529444>`__
303312
304313
Args:
305-
json_dict : The dict of STAC JSON to identify.
306-
307-
Returns:
308-
STACObjectType: The object type represented by the JSON.
314+
json_dict : The dict of JSON to identify.
309315
"""
310-
object_type = None
311-
312-
if "type" in json_dict: # Try to identify using 'type' property
316+
# Try to identify using 'type' property, if present
317+
if "type" in json_dict:
318+
# Try to find 'type' property in known STACObjectType values
313319
for t in pystac.STACObjectType:
314320
if json_dict["type"].lower() == t.value.lower():
315-
object_type = t
316-
break
317-
318-
if object_type is None: # Use old-approach based on other properties
319-
# Identify pre-1.0 ITEMCOLLECTION (since removed)
320-
if "type" in json_dict and "assets" not in json_dict:
321-
if "stac_version" in json_dict and json_dict["stac_version"].startswith(
322-
"0"
323-
):
324-
if json_dict["type"] == "FeatureCollection":
325-
object_type = pystac.STACObjectType.ITEMCOLLECTION
326-
327-
if "extent" in json_dict:
328-
object_type = pystac.STACObjectType.COLLECTION
329-
elif "assets" in json_dict:
330-
object_type = pystac.STACObjectType.ITEM
321+
return t
322+
323+
obj_type = json_dict.get("type")
324+
325+
# For pre-1.0 objects for version 0.8.* or later 'stac_version' must be present,
326+
# except for in ItemCollections (which are handled in the else clause)
327+
if "stac_version" in json_dict:
328+
# Pre-1.0 STAC objects with 'type' == "Feature" are Items
329+
if obj_type == "Feature":
330+
return pystac.STACObjectType.ITEM
331+
# Pre-1.0 STAC objects with 'type' == "FeatureCollection" are ItemCollections
332+
if obj_type == "FeatureCollection":
333+
return pystac.STACObjectType.ITEMCOLLECTION
334+
# Anything else with a 'type' field is not a STAC object
335+
if obj_type is not None:
336+
return None
337+
338+
# Collections will contain either an 'extent' or a 'license' (or both)
339+
if "extent" in json_dict or "license" in json_dict:
340+
return pystac.STACObjectType.COLLECTION
341+
# Everything else that has a stac_version is a Catalog
331342
else:
332-
object_type = pystac.STACObjectType.CATALOG
333-
334-
return object_type
343+
return pystac.STACObjectType.CATALOG
344+
else:
345+
# Prior to STAC 0.9 ItemCollections did not have a stac_version field and could
346+
# only be identified by the fact that all of their 'features' are STAC Items
347+
if obj_type == "FeatureCollection":
348+
if all(
349+
identify_stac_object_type(feat) == pystac.STACObjectType.ITEM
350+
for feat in json_dict.get("features", [])
351+
):
352+
return pystac.STACObjectType.ITEMCOLLECTION
353+
return None
335354

336355

337356
def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
@@ -346,6 +365,9 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
346365
"""
347366
object_type = identify_stac_object_type(json_dict)
348367

368+
if object_type is None:
369+
raise pystac.STACTypeError("JSON does not represent a STAC object.")
370+
349371
version_range = STACVersionRange()
350372

351373
stac_version = json_dict.get("stac_version")

tests/serialization/test_identify.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,27 @@ def test_identify(self) -> None:
4747
set(actual.extensions), set(example.extensions), msg=msg
4848
)
4949

50+
def test_identify_non_stac_type(self) -> None:
51+
plain_feature_dict = {
52+
"type": "Feature",
53+
"properties": {},
54+
"geometry": {"type": "Point", "coordinates": [0, 0]},
55+
}
56+
57+
self.assertIsNone(identify_stac_object_type(plain_feature_dict))
58+
59+
def test_identify_non_stac_raises_error(self) -> None:
60+
plain_feature_dict = {
61+
"type": "Feature",
62+
"properties": {},
63+
"geometry": {"type": "Point", "coordinates": [0, 0]},
64+
}
65+
66+
with self.assertRaises(pystac.STACTypeError) as ctx:
67+
identify_stac_object(plain_feature_dict)
68+
69+
self.assertIn("JSON does not represent a STAC object", str(ctx.exception))
70+
5071

5172
class VersionTest(unittest.TestCase):
5273
def test_version_ordering(self) -> None:

tests/test_catalog.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -795,19 +795,19 @@ def test_full_copy_and_normalize_works_with_created_stac(self) -> None:
795795
def test_extra_fields(self) -> None:
796796
catalog = TestCases.test_case_1()
797797

798-
catalog.extra_fields["type"] = "FeatureCollection"
798+
catalog.extra_fields["custom_field"] = "Special content"
799799

800800
with get_temp_dir() as tmp_dir:
801801
p = os.path.join(tmp_dir, "catalog.json")
802802
catalog.save_object(include_self_link=False, dest_href=p)
803803
with open(p) as f:
804804
cat_json = json.load(f)
805-
self.assertTrue("type" in cat_json)
806-
self.assertEqual(cat_json["type"], "FeatureCollection")
805+
self.assertTrue("custom_field" in cat_json)
806+
self.assertEqual(cat_json["custom_field"], "Special content")
807807

808808
read_cat = pystac.Catalog.from_file(p)
809-
self.assertTrue("type" in read_cat.extra_fields)
810-
self.assertEqual(read_cat.extra_fields["type"], "FeatureCollection")
809+
self.assertTrue("custom_field" in read_cat.extra_fields)
810+
self.assertEqual(read_cat.extra_fields["custom_field"], "Special content")
811811

812812
def test_validate_all(self) -> None:
813813
for cat in TestCases.all_test_catalogs():

0 commit comments

Comments
 (0)