Skip to content

Commit 7e7d070

Browse files
authored
get_root_link/get_child_links/get_item_links: Ensure correct media types (#1497)
* `get_root|child|item_links` should be json-like * Allow media_type on get_links and get_single_link to an iterable * Use `application/json", "application/geo+json" or None as the media_type for `get_root_link`, `get_child_links` and `get_item_links` * Dry up
1 parent c033b51 commit 7e7d070

File tree

5 files changed

+86
-21
lines changed

5 files changed

+86
-21
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
- Top-level `item_assets` dict on `Collection`s ([#1476](https://github.com/stac-utils/pystac/pull/1476))
88
- Render Extension ([#1465](https://github.com/stac-utils/pystac/pull/1465))
9+
- Filter by links by list of media_types
910

1011
### Changed
1112

@@ -16,6 +17,7 @@
1617
- Update Projection Extension to version 2 - proj:epsg -> proj:code ([#1287](https://github.com/stac-utils/pystac/pull/1287))
1718
- Update migrate code to handle license changes in STAC spec 1.1.0 ([#1491](https://github.com/stac-utils/pystac/pull/1491))
1819
- Allow links to have `file://` prefix - but don't write them that way by default ([#1489](https://github.com/stac-utils/pystac/pull/1489))
20+
- For `get_root_link`, `get_child_links`, `get_item_links`: Ensure json media types ([#1497](https://github.com/stac-utils/pystac/pull/1497))
1921
- Raise `STACError` with message when a link is expected to resolve to a STAC object but doesn't ([#1500](https://github.com/stac-utils/pystac/pull/1500))
2022
- Raise an error on APILayoutStrategy when root_href is non-url ([#1498](https://github.com/stac-utils/pystac/pull/1498))
2123

pystac/catalog.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
)
1414

1515
import pystac
16+
import pystac.media_type
1617
from pystac.cache import ResolvedObjectCache
1718
from pystac.errors import STACError, STACTypeError
1819
from pystac.layout import (
@@ -466,7 +467,10 @@ def get_child_links(self) -> list[Link]:
466467
Return:
467468
List[Link]: List of links of this catalog with ``rel == 'child'``
468469
"""
469-
return self.get_links(pystac.RelType.CHILD)
470+
return self.get_links(
471+
rel=pystac.RelType.CHILD,
472+
media_type=pystac.media_type.STAC_JSON,
473+
)
470474

471475
def clear_children(self) -> None:
472476
"""Removes all children from this catalog.
@@ -626,7 +630,9 @@ def get_item_links(self) -> list[Link]:
626630
Return:
627631
List[Link]: List of links of this catalog with ``rel == 'item'``
628632
"""
629-
return self.get_links(pystac.RelType.ITEM)
633+
return self.get_links(
634+
rel=pystac.RelType.ITEM, media_type=pystac.media_type.STAC_JSON
635+
)
630636

631637
def to_dict(
632638
self, include_self_link: bool = True, transform_hrefs: bool = True

pystac/media_type.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ class MediaType(StringEnum):
2424
PDF = "application/pdf"
2525
ZARR = "application/vnd+zarr" # https://github.com/openMetadataInitiative/openMINDS_core/blob/v4/instances/data/contentTypes/zarr.jsonld
2626
NETCDF = "application/netcdf" # https://github.com/Unidata/netcdf/issues/42#issuecomment-1007618822
27+
28+
29+
#: Media types that can be resolved as STAC Objects
30+
STAC_JSON = [None, MediaType.GEOJSON, MediaType.JSON]

pystac/stac_object.py

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,7 @@
33
from abc import ABC, abstractmethod
44
from collections.abc import Callable, Iterable
55
from html import escape
6-
from typing import (
7-
TYPE_CHECKING,
8-
Any,
9-
TypeVar,
10-
cast,
11-
)
6+
from typing import TYPE_CHECKING, Any, TypeAlias, TypeVar, cast
127

138
import pystac
149
from pystac import STACError
@@ -27,6 +22,8 @@
2722

2823
S = TypeVar("S", bound="STACObject")
2924

25+
OptionalMediaType: TypeAlias = str | pystac.MediaType | None
26+
3027

3128
class STACObjectType(StringEnum):
3229
CATALOG = "Catalog"
@@ -177,7 +174,7 @@ def traverse(obj: str | STACObject, visited: set[str | STACObject]) -> bool:
177174
def get_single_link(
178175
self,
179176
rel: str | pystac.RelType | None = None,
180-
media_type: str | pystac.MediaType | None = None,
177+
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
181178
) -> Link | None:
182179
"""Get a single :class:`~pystac.Link` instance associated with this
183180
object.
@@ -186,7 +183,8 @@ def get_single_link(
186183
rel : If set, filter links such that only those
187184
matching this relationship are returned.
188185
media_type: If set, filter the links such that only
189-
those matching media_type are returned
186+
those matching media_type are returned. media_type can
187+
be a single value or a list of values.
190188
191189
Returns:
192190
:class:`~pystac.Link` | None: First link that matches ``rel``
@@ -195,28 +193,31 @@ def get_single_link(
195193
"""
196194
if rel is None and media_type is None:
197195
return next(iter(self.links), None)
196+
if media_type and isinstance(media_type, (str, pystac.MediaType)):
197+
media_type = [media_type]
198198
return next(
199199
(
200200
link
201201
for link in self.links
202202
if (rel is None or link.rel == rel)
203-
and (media_type is None or link.media_type == media_type)
203+
and (media_type is None or link.media_type in media_type)
204204
),
205205
None,
206206
)
207207

208208
def get_links(
209209
self,
210210
rel: str | pystac.RelType | None = None,
211-
media_type: str | pystac.MediaType | None = None,
211+
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
212212
) -> list[Link]:
213213
"""Gets the :class:`~pystac.Link` instances associated with this object.
214214
215215
Args:
216216
rel : If set, filter links such that only those
217217
matching this relationship are returned.
218218
media_type: If set, filter the links such that only
219-
those matching media_type are returned
219+
those matching media_type are returned. media_type can
220+
be a single value or a list of values.
220221
221222
Returns:
222223
List[:class:`~pystac.Link`]: A list of links that match ``rel`` and/
@@ -225,13 +226,14 @@ def get_links(
225226
"""
226227
if rel is None and media_type is None:
227228
return self.links
228-
else:
229-
return [
230-
link
231-
for link in self.links
232-
if (rel is None or link.rel == rel)
233-
and (media_type is None or link.media_type == media_type)
234-
]
229+
if media_type and isinstance(media_type, (str, pystac.MediaType)):
230+
media_type = [media_type]
231+
return [
232+
link
233+
for link in self.links
234+
if (rel is None or link.rel == rel)
235+
and (media_type is None or link.media_type in media_type)
236+
]
235237

236238
def clear_links(self, rel: str | pystac.RelType | None = None) -> None:
237239
"""Clears all :class:`~pystac.Link` instances associated with this object.
@@ -252,7 +254,10 @@ def get_root_link(self) -> Link | None:
252254
:class:`~pystac.Link` or None: The root link for this object,
253255
or ``None`` if no root link is set.
254256
"""
255-
return self.get_single_link(pystac.RelType.ROOT)
257+
return self.get_single_link(
258+
rel=pystac.RelType.ROOT,
259+
media_type=pystac.media_type.STAC_JSON,
260+
)
256261

257262
@property
258263
def self_href(self) -> str:

tests/test_catalog.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,10 @@ def test_get_links(self) -> None:
14091409
len(catalog.get_links(rel="search", media_type="application/geo+json")) == 1
14101410
)
14111411
assert len(catalog.get_links(media_type="text/html")) == 1
1412+
assert (
1413+
len(catalog.get_links(media_type=["text/html", "application/geo+json"]))
1414+
== 2
1415+
)
14121416
assert len(catalog.get_links(rel="search")) == 2
14131417
assert len(catalog.get_links(rel="via")) == 0
14141418
assert len(catalog.get_links()) == 6
@@ -1982,3 +1986,47 @@ def test_APILayoutStrategy_requires_root_to_be_url(
19821986
match="When using APILayoutStrategy the root_href must be a URL",
19831987
):
19841988
catalog.normalize_hrefs(root_href="issues-1486", strategy=APILayoutStrategy())
1989+
1990+
1991+
def test_get_child_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
1992+
catalog.links.extend(
1993+
[
1994+
pystac.Link(
1995+
rel="child", target="./child-1.json", media_type="application/json"
1996+
),
1997+
pystac.Link(
1998+
rel="child", target="./child-2.json", media_type="application/geo+json"
1999+
),
2000+
pystac.Link(rel="child", target="./child-3.json"),
2001+
# this one won't get counted since it's the wrong media_type
2002+
pystac.Link(rel="child", target="./child.html", media_type="text/html"),
2003+
]
2004+
)
2005+
2006+
assert len(catalog.get_child_links()) == 3
2007+
2008+
2009+
def test_get_item_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
2010+
catalog.links.extend(
2011+
[
2012+
pystac.Link(
2013+
rel="item", target="./item-1.json", media_type="application/json"
2014+
),
2015+
pystac.Link(
2016+
rel="item", target="./item-2.json", media_type="application/geo+json"
2017+
),
2018+
pystac.Link(rel="item", target="./item-3.json"),
2019+
# this one won't get counted since it's the wrong media_type
2020+
pystac.Link(rel="item", target="./item.html", media_type="text/html"),
2021+
]
2022+
)
2023+
2024+
assert len(catalog.get_item_links()) == 3
2025+
2026+
2027+
def test_get_root_link_cares_about_media_type(catalog: pystac.Catalog) -> None:
2028+
catalog.links.insert(
2029+
0, pystac.Link(rel="root", target="./self.json", media_type="text/html")
2030+
)
2031+
root_link = catalog.get_root_link()
2032+
assert root_link and root_link.target != "./self.json"

0 commit comments

Comments
 (0)