Skip to content

Commit d0c6cc5

Browse files
committed
Refactor STAC_IO to StacIO
1 parent f6e1716 commit d0c6cc5

21 files changed

+324
-194
lines changed

pystac/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from typing import Any, Dict, Optional
99
from pystac.version import (__version__, get_stac_version, set_stac_version) # type:ignore
10-
from pystac.stac_io import STAC_IO # type:ignore
10+
from pystac.stac_io import StacIO # type:ignore
1111
from pystac.stac_object import (STACObject, STACObjectType) # type:ignore
1212
from pystac.media_type import MediaType # type:ignore
1313
from pystac.link import (Link, HIERARCHICAL_LINKS) # type:ignore
@@ -99,7 +99,8 @@ def write_file(obj: STACObject,
9999

100100
def read_dict(d: Dict[str, Any],
101101
href: Optional[str] = None,
102-
root: Optional[Catalog] = None) -> STACObject:
102+
root: Optional[Catalog] = None,
103+
stac_io: Optional[StacIO] = None) -> STACObject:
103104
"""Reads a STAC object from a dict representing the serialized JSON version of the
104105
STAC object.
105106
@@ -115,5 +116,9 @@ def read_dict(d: Dict[str, Any],
115116
root (Catalog or Collection): Optional root of the catalog for this object.
116117
If provided, the root's resolved object cache can be used to search for
117118
previously resolved instances of the STAC object.
119+
stac_io: Optional StacIO instance to use for reading. If None, the
120+
default instance will be used.
118121
"""
119-
return STAC_IO.stac_object_from_dict(d, href, root)
122+
if stac_io is None:
123+
stac_io = StacIO.default()
124+
return stac_io.stac_object_from_dict(d, href, root)

pystac/catalog.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ class Catalog(STACObject):
110110

111111
STAC_OBJECT_TYPE = ps.STACObjectType.CATALOG
112112

113+
_stac_io: Optional[ps.StacIO] = None
114+
"""Optional instance of StacIO that will be used by default
115+
for any IO operations on objects contained by this catalog.
116+
Set while reading in a catalog. This is set when a catalog
117+
is read by a StacIO instance."""
118+
113119
DEFAULT_FILE_NAME = "catalog.json"
114120
"""Default file name that will be given to this STAC object in a canonical format."""
115121
def __init__(self,

pystac/link.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from typing import Any, Dict, Optional, TYPE_CHECKING, Union, cast
33

44
import pystac as ps
5-
from pystac.stac_io import STAC_IO
65
from pystac.utils import (make_absolute_href, make_relative_href, is_absolute_href)
76

87
if TYPE_CHECKING:
@@ -176,11 +175,22 @@ def resolve_stac_object(self, root: Optional["Catalog_Type"] = None) -> "Link":
176175
target_href = make_absolute_href(target_href, start_href)
177176
obj = None
178177

178+
stac_io: Optional[ps.StacIO] = None
179+
179180
if root is not None:
180181
obj = root._resolved_objects.get_by_href(target_href)
182+
stac_io = root._stac_io
181183

182184
if obj is None:
183-
obj = STAC_IO.read_stac_object(target_href, root=root)
185+
186+
if stac_io is None:
187+
if self.owner is not None:
188+
if isinstance(self.owner, ps.Catalog):
189+
stac_io = self.owner._stac_io
190+
if stac_io is None:
191+
stac_io = ps.StacIO.default()
192+
193+
obj = stac_io.read_stac_object(target_href, root=root)
184194
obj.set_self_href(target_href)
185195
if root is not None:
186196
obj = root._resolved_objects.get_or_cache(obj)

pystac/serialization/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ def stac_object_from_dict(d: Dict[str, Any],
2727
If provided, the root's resolved object cache can be used to search for
2828
previously resolved instances of the STAC object.
2929
30-
Note: This is used internally in STAC_IO to deserialize STAC Objects.
31-
It is in the top level __init__ in order to avoid circular dependencies.
30+
Note: This is used internally in StacIO instances to deserialize STAC Objects.
3231
"""
3332
if identify_stac_object_type(d) == ps.STACObjectType.ITEM:
3433
collection_cache = None

pystac/serialization/common_properties.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ def merge_common_properties(item_dict: Dict[str, Any],
1111
json_href: Optional[str] = None) -> bool:
1212
"""Merges Collection properties into an Item.
1313
14+
Note: This is only applicable to reading old STAC versions (pre 1.0.0-beta.1).
15+
1416
Args:
1517
item_dict (dict): JSON dict of the Item which properties should be merged
1618
into.
@@ -70,7 +72,7 @@ def merge_common_properties(item_dict: Dict[str, Any],
7072
collection = collection_cache.get_by_href(collection_href)
7173

7274
if collection is None:
73-
collection = ps.STAC_IO.read_json(collection_href)
75+
collection = ps.StacIO.default().read_json(collection_href)
7476

7577
if collection is not None:
7678
collection_id = None

pystac/stac_io.py

Lines changed: 200 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,231 @@
1+
from abc import ABC, abstractmethod
12
import os
23
import json
3-
from typing import Any, Callable, Dict, Optional, TYPE_CHECKING
4+
from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING, Tuple, Type, Union
45

56
from urllib.parse import urlparse
67
from urllib.request import urlopen
78
from urllib.error import HTTPError
89

10+
import pystac as ps
911
import pystac.serialization
1012

13+
# Use orjson if available
14+
try:
15+
import orjson
16+
except ImportError:
17+
orjson = None
18+
1119
if TYPE_CHECKING:
1220
from pystac.stac_object import STACObject as STACObject_Type
1321
from pystac.catalog import Catalog as Catalog_Type
22+
from pystac.link import Link as Link_Type
1423

1524

16-
class STAC_IO:
17-
"""Methods used to read and save STAC json.
18-
Allows users of the library to set their own methods
19-
(e.g. for reading and writing from cloud storage)
20-
"""
21-
@staticmethod
22-
def default_read_text_method(uri: str) -> str:
23-
"""Default method for reading text. Only handles local file paths."""
24-
parsed = urlparse(uri)
25+
class StacIO(ABC):
26+
_default_io: Optional[Type["StacIO"]] = None
27+
28+
@abstractmethod
29+
def read_text(self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any) -> str:
30+
"""Read text from the given URI.
31+
32+
The source to read from can be specified
33+
as a string or a Link. If it's a string, it's the URL of the HREF from which to
34+
read. When reading links, PySTAC will pass in the entire link body.
35+
This enables implementations to utilize additional link information,
36+
e.g. the "post" information in a pagination link from a STAC API search.
37+
38+
Args:
39+
source (str or pystac.Link): The source to read from.
40+
41+
Returns:
42+
str: The text contained in the file at the location specified by the uri.
43+
"""
44+
raise NotImplementedError("read_text not implemented")
45+
46+
@abstractmethod
47+
def write_text(self, dest: Union[str, "Link_Type"], txt: str, *args: Any,
48+
**kwargs: Any) -> None:
49+
"""Write the given text to a file at the given URI.
50+
51+
The destination to write to from can be specified
52+
as a string or a Link. If it's a string, it's the URL of the HREF from which to
53+
read. When writing based on links links, PySTAC will pass in the entire link body.
54+
55+
Args:
56+
dest (str or pystac.Link): The destination to write to.
57+
txt (str): The text to write.
58+
"""
59+
raise NotImplementedError("write_text not implemented")
60+
61+
def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]:
62+
if orjson is not None:
63+
return orjson.loads(txt)
64+
else:
65+
return json.loads(self.read_text(txt))
66+
67+
def _json_dumps(self, json_dict: Dict[str, Any], source: Union[str, "Link_Type"]) -> str:
68+
if orjson is not None:
69+
return orjson.dumps(json_dict, option=orjson.OPT_INDENT_2).decode('utf-8')
70+
else:
71+
return json.dumps(json_dict, indent=2)
72+
73+
def stac_object_from_dict(self,
74+
d: Dict[str, Any],
75+
href: Optional[str] = None,
76+
root: Optional["Catalog_Type"] = None) -> "STACObject_Type":
77+
result = pystac.serialization.stac_object_from_dict(d, href, root)
78+
if isinstance(result, ps.Catalog):
79+
# Set the stac_io instance for usage by io operations
80+
# where this catalog is the root.
81+
result._stac_io = self
82+
return result
83+
84+
def read_json(self, source: Union[str, "Link_Type"]) -> Dict[str, Any]:
85+
"""Read a dict from the given source.
86+
87+
See :func:`StacIO.read_text <pystac.StacIO.read_text>` for usage of
88+
str vs Link as a parameter.
89+
90+
Args:
91+
source (str or Link): The source from which to read.
92+
93+
Returns:
94+
dict: A dict representation of the JSON contained in the file at the
95+
given source.
96+
"""
97+
txt = self.read_text(source)
98+
return self._json_loads(txt, source)
99+
100+
def read_stac_object(self,
101+
source: Union[str, "Link_Type"],
102+
root: Optional["Catalog_Type"] = None) -> "STACObject_Type":
103+
"""Read a STACObject from a JSON file at the given source.
104+
105+
See :func:`StacIO.read_text <pystac.StacIO.read_text>` for usage of
106+
str vs Link as a parameter.
107+
108+
Args:
109+
source (str or pystac.Link): The source from which to read.
110+
root (Catalog or Collection): Optional root of the catalog for this object.
111+
If provided, the root's resolved object cache can be used to search for
112+
previously resolved instances of the STAC object.
113+
114+
Returns:
115+
STACObject: The deserialized STACObject from the serialized JSON
116+
contained in the file at the given uri.
117+
"""
118+
d = self.read_json(source)
119+
href = source if isinstance(source, str) else source.get_absolute_href()
120+
return self.stac_object_from_dict(d, href=href, root=root)
121+
122+
def save_json(self, dest: Union[str, "Link_Type"], json_dict: Dict[str, Any]) -> None:
123+
"""Write a dict to the given URI as JSON.
124+
125+
See :func:`StacIO.write_text <pystac.StacIO.write_text>` for usage of
126+
str vs Link as a parameter.
127+
128+
Args:
129+
dest (str or pystac.Link): The destination file to write the text to.
130+
json_dict (dict): The JSON dict to write.
131+
"""
132+
txt = self._json_dumps(json_dict, dest)
133+
self.write_text(dest, txt)
134+
135+
@classmethod
136+
def set_default(cls, stac_io_class: Type["StacIO"]) -> None:
137+
"""Set the default StacIO instance to use."""
138+
cls._default_io = stac_io_class
139+
140+
@classmethod
141+
def default(cls) -> "StacIO":
142+
if cls._default_io is None:
143+
cls._default_io = DefaultStacIO
144+
145+
return cls._default_io()
146+
147+
148+
class DefaultStacIO(StacIO):
149+
def read_text(self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any) -> str:
150+
if isinstance(source, str):
151+
href = source
152+
else:
153+
href = source.get_absolute_href()
154+
if href is None:
155+
raise IOError(f"Could not get an absolute HREF from link {source}")
156+
157+
parsed = urlparse(href)
25158
if parsed.scheme != '':
26159
try:
27-
with urlopen(uri) as f:
160+
with urlopen(href) as f:
28161
return f.read().decode('utf-8')
29162
except HTTPError as e:
30-
raise Exception("Could not read uri {}".format(uri)) from e
163+
raise Exception("Could not read uri {}".format(href)) from e
31164
else:
32-
with open(uri) as f:
165+
with open(href) as f:
33166
return f.read()
34167

35-
@staticmethod
36-
def default_write_text_method(uri: str, txt: str) -> None:
37-
"""Default method for writing text. Only handles local file paths."""
38-
dirname = os.path.dirname(uri)
168+
def write_text(self, dest: Union[str, "Link_Type"], txt: str, *args: Any,
169+
**kwargs: Any) -> None:
170+
if isinstance(dest, str):
171+
href = dest
172+
else:
173+
href = dest.get_absolute_href()
174+
if href is None:
175+
raise IOError(f"Could not get an absolute HREF from link {dest}")
176+
177+
dirname = os.path.dirname(href)
39178
if dirname != '' and not os.path.isdir(dirname):
40179
os.makedirs(dirname)
41-
with open(uri, 'w') as f:
180+
with open(href, 'w') as f:
42181
f.write(txt)
43182

44-
read_text_method: Callable[[str], str] = default_read_text_method
45-
"""Users of PySTAC can replace the read_text_method in order
46-
to expand the ability of PySTAC to read different file systems.
47-
For example, a client of the library might replace this class
48-
member in it's own __init__.py with a method that can read from
49-
cloud storage.
183+
184+
class DuplicateObjectKeyError(Exception):
185+
pass
186+
187+
188+
class DuplicateKeyReportingMixin(StacIO):
189+
"""A mixin for StacIO implementations that will report
190+
on duplicate keys in the JSON being read in.
191+
192+
See https://github.com/stac-utils/pystac/issues/313
50193
"""
194+
def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]:
195+
return json.loads(txt, object_pairs_hook=self.duplicate_object_names_report_builder(source))
196+
197+
@staticmethod
198+
def duplicate_object_names_report_builder(
199+
source: Union[str, "Link_Type"]) -> Callable[[List[Tuple[str, Any]]], Dict[str, Any]]:
200+
def report_duplicate_object_names(object_pairs: List[Tuple[str, Any]]) -> Dict[str, Any]:
201+
result: Dict[str, Any] = {}
202+
for key, value in object_pairs:
203+
if key in result:
204+
url = source if isinstance(source, str) else source.get_absolute_href()
205+
raise DuplicateObjectKeyError(f"Found duplicate object name “{key}” in “{url}”")
206+
else:
207+
result[key] = value
208+
return result
209+
210+
return report_duplicate_object_names
211+
212+
213+
class STAC_IO:
214+
"""DEPRECATED: Methods used to read and save STAC json.
215+
Allows users of the library to set their own methods
216+
(e.g. for reading and writing from cloud storage)
51217
52-
write_text_method: Callable[[str, str], None] = default_write_text_method
53-
"""Users of PySTAC can replace the write_text_method in order
54-
to expand the ability of PySTAC to write to different file systems.
55-
For example, a client of the library might replace this class
56-
member in it's own __init__.py with a method that can read from
57-
cloud storage.
218+
Note: The static methods of this class are deprecated. Move to using
219+
instance methods of a specific instance of StacIO.
58220
"""
221+
@staticmethod
222+
def read_text_method(uri: str) -> str:
223+
return StacIO.default().read_text(uri)
224+
225+
@staticmethod
226+
def write_text_method(uri: str, txt: str) -> None:
227+
"""Default method for writing text."""
228+
return StacIO.default().write_text(uri, txt)
59229

60230
@staticmethod
61231
def stac_object_from_dict(d: Dict[str, Any],

0 commit comments

Comments
 (0)