@@ -225,72 +225,96 @@ written (e.g. if you are working with self-contained catalogs).
225
225
226
226
.. _using stac_io :
227
227
228
- Using STAC_IO
228
+ I/O in PySTAC
229
229
=============
230
230
231
- The :class: `~pystac.STAC_IO ` class is the way PySTAC reads and writes text from file
232
- locations. Since PySTAC aims to be dependency-free, there is no default mechanisms to
233
- read and write from anything but the local file system. However, users of PySTAC may
234
- want to read and write from other file systems, such as HTTP or cloud object storage.
235
- STAC_IO allows users to hook into PySTAC and define their own reading and writing
236
- primitives to allow for those use cases.
237
-
238
- To enable reading from other types of file systems, it is recommended that in the
239
- `__init__.py ` of the client module, or at the beginning of the script using PySTAC, you
240
- overwrite the :func: `STAC_IO.read_text_method <pystac.STAC_IO.read_text_method> ` and
241
- :func: `STAC_IO.write_text_method <pystac.STAC_IO.write_text_method> ` members of STAC_IO
242
- with functions that read and write however you need. For example, this code will allow
231
+ The :class: `pystac.StacIO ` class defines fundamental methods for I/O
232
+ operations within PySTAC, including serialization and deserialization to and from
233
+ JSON files and conversion to and from Python dictionaries. This is an abstract class
234
+ and should not be instantiated directly. However, PySTAC provides a
235
+ :class: `pystac.stac_io.DefaultStacIO ` class with minimal implementations of these
236
+ methods. This default implementation provides support for reading and writing files
237
+ from the local filesystem as well as HTTP URIs (using ``urllib ``). This class is
238
+ created automatically by all of the object-specific I/O methods (e.g.
239
+ :meth: `pystac.Catalog.from_file `), so most users will not need to instantiate this
240
+ class themselves.
241
+
242
+ If you require custom logic for I/O operations or would like to use a 3rd-party library
243
+ for I/O operations (e.g. ``requests ``), you can create a sub-class of
244
+ :class: `pystac.StacIO ` (or :class: `pystac.DefaultStacIO `) and customize the methods as
245
+ you see fit. You can then pass instances of this custom sub-class into the ``stac_io ``
246
+ argument of most object-specific I/O methods. You can also use
247
+ :meth: `pystac.StacIO.set_default ` in your client's ``__init__.py `` file to make this
248
+ sub-class the default :class: `pystac.StacIO ` implementation throughout the library.
249
+
250
+ For example, this code will allow
243
251
for reading from AWS's S3 cloud object storage using `boto3
244
- <https://boto3.amazonaws.com/v1/documentation/api/latest/index.html> `_ :
252
+ <https://boto3.amazonaws.com/v1/documentation/api/latest/index.html> `__ :
245
253
246
254
.. code-block :: python
247
255
248
256
from urllib.parse import urlparse
249
257
import boto3
250
- from pystac import STAC_IO
251
-
252
- def my_read_method (uri ):
253
- parsed = urlparse(uri)
254
- if parsed.scheme == ' s3' :
255
- bucket = parsed.netloc
256
- key = parsed.path[1 :]
257
- s3 = boto3.resource(' s3' )
258
- obj = s3.Object(bucket, key)
259
- return obj.get()[' Body' ].read().decode(' utf-8' )
260
- else :
261
- return STAC_IO .default_read_text_method(uri)
262
-
263
- def my_write_method (uri , txt ):
264
- parsed = urlparse(uri)
265
- if parsed.scheme == ' s3' :
266
- bucket = parsed.netloc
267
- key = parsed.path[1 :]
268
- s3 = boto3.resource(" s3" )
269
- s3.Object(bucket, key).put(Body = txt)
270
- else :
271
- STAC_IO .default_write_text_method(uri, txt)
272
-
273
- STAC_IO .read_text_method = my_read_method
274
- STAC_IO .write_text_method = my_write_method
275
-
276
- If you are only going to read from another source, e.g. HTTP, you could only replace the
277
- read method. For example, using the `requests library
278
- <https://requests.kennethreitz.org/en/master> `_:
258
+ from pystac import Link
259
+ from pystac.stac_io import DefaultStacIO, StacIO
260
+
261
+ class CustomStacIO (DefaultStacIO ):
262
+ def __init__ ():
263
+ self .s3 = boto3.resource(" s3" )
264
+
265
+ def read_text (
266
+ self , source : Union[str , Link], * args : Any, ** kwargs : Any
267
+ ) -> str :
268
+ parsed = urlparse(uri)
269
+ if parsed.scheme == " s3" :
270
+ bucket = parsed.netloc
271
+ key = parsed.path[1 :]
272
+
273
+ obj = self .s3.Object(bucket, key)
274
+ return obj.get()[" Body" ].read().decode(" utf-8" )
275
+ else :
276
+ return super ().read_text(source, * args, ** kwargs)
277
+
278
+ def write_text (
279
+ self , dest : Union[str , Link], txt : str , * args : Any, ** kwargs : Any
280
+ ) -> None :
281
+ parsed = urlparse(uri)
282
+ if parsed.scheme == " s3" :
283
+ bucket = parsed.netloc
284
+ key = parsed.path[1 :]
285
+ s3 = boto3.resource(" s3" )
286
+ s3.Object(bucket, key).put(Body = txt, ContentEncoding = " utf-8" )
287
+ else :
288
+ super ().write_text(dest, txt, * args, ** kwargs)
289
+
290
+ StacIO.set_default(CustomStacIO)
291
+
292
+
293
+ If you only need to customize read operations you can inherit from
294
+ :class: `~pystac.stac_io.DefaultStacIO ` and only overwrite the read method. For example,
295
+ to take advantage of connection pooling using a `requests.Session
296
+ <https://requests.kennethreitz.org/en/master> `__:
279
297
280
298
.. code-block :: python
281
299
282
300
from urllib.parse import urlparse
283
301
import requests
284
- from pystac import STAC_IO
285
-
286
- def my_read_method (uri ):
287
- parsed = urlparse(uri)
288
- if parsed.scheme.startswith(' http' ):
289
- return requests.get(uri).text
290
- else :
291
- return STAC_IO .default_read_text_method(uri)
292
-
293
- STAC_IO .read_text_method = my_read_method
302
+ from pystac.stac_io import DefaultStacIO, StacIO
303
+
304
+ class ConnectionPoolingIO (DefaultStacIO ):
305
+ def __init__ ():
306
+ self .session = requests.Session()
307
+
308
+ def read_text (
309
+ self , source : Union[str , Link], * args : Any, ** kwargs : Any
310
+ ) -> str :
311
+ parsed = urlparse(uri)
312
+ if parsed.scheme.startswith(" http" ):
313
+ return self .session.get(uri).text
314
+ else :
315
+ return super ().read_text(source, * args, ** kwargs)
316
+
317
+ StacIO.set_default(ConnectionPoolingIO)
294
318
295
319
Validation
296
320
==========
0 commit comments