|
| 1 | +from abc import ABC, abstractmethod |
1 | 2 | import os
|
2 | 3 | import json
|
3 |
| -from typing import Any, Callable, Dict, Optional, TYPE_CHECKING |
| 4 | +from typing import Any, Callable, Dict, List, Optional, TYPE_CHECKING, Tuple, Type, Union |
4 | 5 |
|
5 | 6 | from urllib.parse import urlparse
|
6 | 7 | from urllib.request import urlopen
|
7 | 8 | from urllib.error import HTTPError
|
8 | 9 |
|
| 10 | +import pystac as ps |
9 | 11 | import pystac.serialization
|
10 | 12 |
|
| 13 | +# Use orjson if available |
| 14 | +try: |
| 15 | + import orjson |
| 16 | +except ImportError: |
| 17 | + orjson = None |
| 18 | + |
11 | 19 | if TYPE_CHECKING:
|
12 | 20 | from pystac.stac_object import STACObject as STACObject_Type
|
13 | 21 | from pystac.catalog import Catalog as Catalog_Type
|
| 22 | + from pystac.link import Link as Link_Type |
14 | 23 |
|
15 | 24 |
|
16 |
| -class STAC_IO: |
17 |
| - """Methods used to read and save STAC json. |
18 |
| - Allows users of the library to set their own methods |
19 |
| - (e.g. for reading and writing from cloud storage) |
20 |
| - """ |
21 |
| - @staticmethod |
22 |
| - def default_read_text_method(uri: str) -> str: |
23 |
| - """Default method for reading text. Only handles local file paths.""" |
24 |
| - parsed = urlparse(uri) |
| 25 | +class StacIO(ABC): |
| 26 | + _default_io: Optional[Type["StacIO"]] = None |
| 27 | + |
| 28 | + @abstractmethod |
| 29 | + def read_text(self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any) -> str: |
| 30 | + """Read text from the given URI. |
| 31 | +
|
| 32 | + The source to read from can be specified |
| 33 | + as a string or a Link. If it's a string, it's the URL of the HREF from which to |
| 34 | + read. When reading links, PySTAC will pass in the entire link body. |
| 35 | + This enables implementations to utilize additional link information, |
| 36 | + e.g. the "post" information in a pagination link from a STAC API search. |
| 37 | +
|
| 38 | + Args: |
| 39 | + source (str or pystac.Link): The source to read from. |
| 40 | +
|
| 41 | + Returns: |
| 42 | + str: The text contained in the file at the location specified by the uri. |
| 43 | + """ |
| 44 | + raise NotImplementedError("read_text not implemented") |
| 45 | + |
| 46 | + @abstractmethod |
| 47 | + def write_text(self, dest: Union[str, "Link_Type"], txt: str, *args: Any, |
| 48 | + **kwargs: Any) -> None: |
| 49 | + """Write the given text to a file at the given URI. |
| 50 | +
|
| 51 | + The destination to write to from can be specified |
| 52 | + as a string or a Link. If it's a string, it's the URL of the HREF from which to |
| 53 | + read. When writing based on links links, PySTAC will pass in the entire link body. |
| 54 | +
|
| 55 | + Args: |
| 56 | + dest (str or pystac.Link): The destination to write to. |
| 57 | + txt (str): The text to write. |
| 58 | + """ |
| 59 | + raise NotImplementedError("write_text not implemented") |
| 60 | + |
| 61 | + def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]: |
| 62 | + if orjson is not None: |
| 63 | + return orjson.loads(txt) |
| 64 | + else: |
| 65 | + return json.loads(self.read_text(txt)) |
| 66 | + |
| 67 | + def _json_dumps(self, json_dict: Dict[str, Any], source: Union[str, "Link_Type"]) -> str: |
| 68 | + if orjson is not None: |
| 69 | + return orjson.dumps(json_dict, option=orjson.OPT_INDENT_2).decode('utf-8') |
| 70 | + else: |
| 71 | + return json.dumps(json_dict, indent=2) |
| 72 | + |
| 73 | + def stac_object_from_dict(self, |
| 74 | + d: Dict[str, Any], |
| 75 | + href: Optional[str] = None, |
| 76 | + root: Optional["Catalog_Type"] = None) -> "STACObject_Type": |
| 77 | + result = pystac.serialization.stac_object_from_dict(d, href, root) |
| 78 | + if isinstance(result, ps.Catalog): |
| 79 | + # Set the stac_io instance for usage by io operations |
| 80 | + # where this catalog is the root. |
| 81 | + result._stac_io = self |
| 82 | + return result |
| 83 | + |
| 84 | + def read_json(self, source: Union[str, "Link_Type"]) -> Dict[str, Any]: |
| 85 | + """Read a dict from the given source. |
| 86 | +
|
| 87 | + See :func:`StacIO.read_text <pystac.StacIO.read_text>` for usage of |
| 88 | + str vs Link as a parameter. |
| 89 | +
|
| 90 | + Args: |
| 91 | + source (str or Link): The source from which to read. |
| 92 | +
|
| 93 | + Returns: |
| 94 | + dict: A dict representation of the JSON contained in the file at the |
| 95 | + given source. |
| 96 | + """ |
| 97 | + txt = self.read_text(source) |
| 98 | + return self._json_loads(txt, source) |
| 99 | + |
| 100 | + def read_stac_object(self, |
| 101 | + source: Union[str, "Link_Type"], |
| 102 | + root: Optional["Catalog_Type"] = None) -> "STACObject_Type": |
| 103 | + """Read a STACObject from a JSON file at the given source. |
| 104 | +
|
| 105 | + See :func:`StacIO.read_text <pystac.StacIO.read_text>` for usage of |
| 106 | + str vs Link as a parameter. |
| 107 | +
|
| 108 | + Args: |
| 109 | + source (str or pystac.Link): The source from which to read. |
| 110 | + root (Catalog or Collection): Optional root of the catalog for this object. |
| 111 | + If provided, the root's resolved object cache can be used to search for |
| 112 | + previously resolved instances of the STAC object. |
| 113 | +
|
| 114 | + Returns: |
| 115 | + STACObject: The deserialized STACObject from the serialized JSON |
| 116 | + contained in the file at the given uri. |
| 117 | + """ |
| 118 | + d = self.read_json(source) |
| 119 | + href = source if isinstance(source, str) else source.get_absolute_href() |
| 120 | + return self.stac_object_from_dict(d, href=href, root=root) |
| 121 | + |
| 122 | + def save_json(self, dest: Union[str, "Link_Type"], json_dict: Dict[str, Any]) -> None: |
| 123 | + """Write a dict to the given URI as JSON. |
| 124 | +
|
| 125 | + See :func:`StacIO.write_text <pystac.StacIO.write_text>` for usage of |
| 126 | + str vs Link as a parameter. |
| 127 | +
|
| 128 | + Args: |
| 129 | + dest (str or pystac.Link): The destination file to write the text to. |
| 130 | + json_dict (dict): The JSON dict to write. |
| 131 | + """ |
| 132 | + txt = self._json_dumps(json_dict, dest) |
| 133 | + self.write_text(dest, txt) |
| 134 | + |
| 135 | + @classmethod |
| 136 | + def set_default(cls, stac_io_class: Type["StacIO"]) -> None: |
| 137 | + """Set the default StacIO instance to use.""" |
| 138 | + cls._default_io = stac_io_class |
| 139 | + |
| 140 | + @classmethod |
| 141 | + def default(cls) -> "StacIO": |
| 142 | + if cls._default_io is None: |
| 143 | + cls._default_io = DefaultStacIO |
| 144 | + |
| 145 | + return cls._default_io() |
| 146 | + |
| 147 | + |
| 148 | +class DefaultStacIO(StacIO): |
| 149 | + def read_text(self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any) -> str: |
| 150 | + if isinstance(source, str): |
| 151 | + href = source |
| 152 | + else: |
| 153 | + href = source.get_absolute_href() |
| 154 | + if href is None: |
| 155 | + raise IOError(f"Could not get an absolute HREF from link {source}") |
| 156 | + |
| 157 | + parsed = urlparse(href) |
25 | 158 | if parsed.scheme != '':
|
26 | 159 | try:
|
27 |
| - with urlopen(uri) as f: |
| 160 | + with urlopen(href) as f: |
28 | 161 | return f.read().decode('utf-8')
|
29 | 162 | except HTTPError as e:
|
30 |
| - raise Exception("Could not read uri {}".format(uri)) from e |
| 163 | + raise Exception("Could not read uri {}".format(href)) from e |
31 | 164 | else:
|
32 |
| - with open(uri) as f: |
| 165 | + with open(href) as f: |
33 | 166 | return f.read()
|
34 | 167 |
|
35 |
| - @staticmethod |
36 |
| - def default_write_text_method(uri: str, txt: str) -> None: |
37 |
| - """Default method for writing text. Only handles local file paths.""" |
38 |
| - dirname = os.path.dirname(uri) |
| 168 | + def write_text(self, dest: Union[str, "Link_Type"], txt: str, *args: Any, |
| 169 | + **kwargs: Any) -> None: |
| 170 | + if isinstance(dest, str): |
| 171 | + href = dest |
| 172 | + else: |
| 173 | + href = dest.get_absolute_href() |
| 174 | + if href is None: |
| 175 | + raise IOError(f"Could not get an absolute HREF from link {dest}") |
| 176 | + |
| 177 | + dirname = os.path.dirname(href) |
39 | 178 | if dirname != '' and not os.path.isdir(dirname):
|
40 | 179 | os.makedirs(dirname)
|
41 |
| - with open(uri, 'w') as f: |
| 180 | + with open(href, 'w') as f: |
42 | 181 | f.write(txt)
|
43 | 182 |
|
44 |
| - read_text_method: Callable[[str], str] = default_read_text_method |
45 |
| - """Users of PySTAC can replace the read_text_method in order |
46 |
| - to expand the ability of PySTAC to read different file systems. |
47 |
| - For example, a client of the library might replace this class |
48 |
| - member in it's own __init__.py with a method that can read from |
49 |
| - cloud storage. |
| 183 | + |
| 184 | +class DuplicateObjectKeyError(Exception): |
| 185 | + pass |
| 186 | + |
| 187 | + |
| 188 | +class DuplicateKeyReportingMixin(StacIO): |
| 189 | + """A mixin for StacIO implementations that will report |
| 190 | + on duplicate keys in the JSON being read in. |
| 191 | +
|
| 192 | + See https://github.com/stac-utils/pystac/issues/313 |
50 | 193 | """
|
| 194 | + def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]: |
| 195 | + return json.loads(txt, object_pairs_hook=self.duplicate_object_names_report_builder(source)) |
| 196 | + |
| 197 | + @staticmethod |
| 198 | + def duplicate_object_names_report_builder( |
| 199 | + source: Union[str, "Link_Type"]) -> Callable[[List[Tuple[str, Any]]], Dict[str, Any]]: |
| 200 | + def report_duplicate_object_names(object_pairs: List[Tuple[str, Any]]) -> Dict[str, Any]: |
| 201 | + result: Dict[str, Any] = {} |
| 202 | + for key, value in object_pairs: |
| 203 | + if key in result: |
| 204 | + url = source if isinstance(source, str) else source.get_absolute_href() |
| 205 | + raise DuplicateObjectKeyError(f"Found duplicate object name “{key}” in “{url}”") |
| 206 | + else: |
| 207 | + result[key] = value |
| 208 | + return result |
| 209 | + |
| 210 | + return report_duplicate_object_names |
| 211 | + |
| 212 | + |
| 213 | +class STAC_IO: |
| 214 | + """DEPRECATED: Methods used to read and save STAC json. |
| 215 | + Allows users of the library to set their own methods |
| 216 | + (e.g. for reading and writing from cloud storage) |
51 | 217 |
|
52 |
| - write_text_method: Callable[[str, str], None] = default_write_text_method |
53 |
| - """Users of PySTAC can replace the write_text_method in order |
54 |
| - to expand the ability of PySTAC to write to different file systems. |
55 |
| - For example, a client of the library might replace this class |
56 |
| - member in it's own __init__.py with a method that can read from |
57 |
| - cloud storage. |
| 218 | + Note: The static methods of this class are deprecated. Move to using |
| 219 | + instance methods of a specific instance of StacIO. |
58 | 220 | """
|
| 221 | + @staticmethod |
| 222 | + def read_text_method(uri: str) -> str: |
| 223 | + return StacIO.default().read_text(uri) |
| 224 | + |
| 225 | + @staticmethod |
| 226 | + def write_text_method(uri: str, txt: str) -> None: |
| 227 | + """Default method for writing text.""" |
| 228 | + return StacIO.default().write_text(uri, txt) |
59 | 229 |
|
60 | 230 | @staticmethod
|
61 | 231 | def stac_object_from_dict(d: Dict[str, Any],
|
|
0 commit comments