@@ -318,49 +318,195 @@ argument of most object-specific I/O methods. You can also use
318
318
:meth: `pystac.StacIO.set_default ` in your client's ``__init__.py `` file to make this
319
319
sub-class the default :class: `pystac.StacIO ` implementation throughout the library.
320
320
321
- For example, this code will allow
321
+ For example, the following code examples will allow
322
322
for reading from AWS's S3 cloud object storage using `boto3
323
- <https://boto3.amazonaws.com/v1/documentation/api/latest/index.html> `__:
324
-
325
- .. code-block :: python
326
-
327
- from urllib.parse import urlparse
328
- import boto3
329
- from pystac import Link
330
- from pystac.stac_io import DefaultStacIO, StacIO
331
- from typing import Union, Any
332
-
333
- class CustomStacIO (DefaultStacIO ):
334
- def __init__ (self ):
335
- self .s3 = boto3.resource(" s3" )
336
- super ().__init__ ()
337
-
338
- def read_text (
339
- self , source : Union[str , Link], * args : Any, ** kwargs : Any
340
- ) -> str :
341
- parsed = urlparse(source)
342
- if parsed.scheme == " s3" :
343
- bucket = parsed.netloc
344
- key = parsed.path[1 :]
345
-
346
- obj = self .s3.Object(bucket, key)
347
- return obj.get()[" Body" ].read().decode(" utf-8" )
348
- else :
349
- return super ().read_text(source, * args, ** kwargs)
350
-
351
- def write_text (
352
- self , dest : Union[str , Link], txt : str , * args : Any, ** kwargs : Any
353
- ) -> None :
354
- parsed = urlparse(dest)
355
- if parsed.scheme == " s3" :
356
- bucket = parsed.netloc
357
- key = parsed.path[1 :]
358
- self .s3.Object(bucket, key).put(Body = txt, ContentEncoding = " utf-8" )
359
- else :
360
- super ().write_text(dest, txt, * args, ** kwargs)
361
-
362
- StacIO.set_default(CustomStacIO)
363
-
323
+ <https://boto3.amazonaws.com/v1/documentation/api/latest/index.html> `__
324
+ or Azure Blob Storage using the `Azure SDK for Python
325
+ <https://learn.microsoft.com/en-us/python/api/overview/azure/storage-blob-readme?view=azure-python> `__:
326
+
327
+ .. tab-set ::
328
+ .. tab-item :: AWS S3
329
+
330
+ .. code-block :: python
331
+
332
+ from urllib.parse import urlparse
333
+ import boto3
334
+ from pystac import Link
335
+ from pystac.stac_io import DefaultStacIO, StacIO
336
+ from typing import Union, Any
337
+
338
+ class CustomStacIO (DefaultStacIO ):
339
+ def __init__ (self ):
340
+ self .s3 = boto3.resource(" s3" )
341
+ super ().__init__ ()
342
+
343
+ def read_text (
344
+ self , source : Union[str , Link], * args : Any, ** kwargs : Any
345
+ ) -> str :
346
+ parsed = urlparse(source)
347
+ if parsed.scheme == " s3" :
348
+ bucket = parsed.netloc
349
+ key = parsed.path[1 :]
350
+
351
+ obj = self .s3.Object(bucket, key)
352
+ return obj.get()[" Body" ].read().decode(" utf-8" )
353
+ else :
354
+ return super ().read_text(source, * args, ** kwargs)
355
+
356
+ def write_text (
357
+ self , dest : Union[str , Link], txt : str , * args : Any, ** kwargs : Any
358
+ ) -> None :
359
+ parsed = urlparse(dest)
360
+ if parsed.scheme == " s3" :
361
+ bucket = parsed.netloc
362
+ key = parsed.path[1 :]
363
+ self .s3.Object(bucket, key).put(Body = txt, ContentEncoding = " utf-8" )
364
+ else :
365
+ super ().write_text(dest, txt, * args, ** kwargs)
366
+
367
+ StacIO.set_default(CustomStacIO)
368
+
369
+ .. tab-item :: Azure Blob Storage
370
+
371
+ .. code-block :: python
372
+
373
+ import os
374
+ import re
375
+ from typing import Any, Dict, Optional, Tuple, Union
376
+ from urllib.parse import urlparse
377
+
378
+ from azure.core.credentials import (
379
+ AzureNamedKeyCredential,
380
+ AzureSasCredential,
381
+ TokenCredential,
382
+ )
383
+ from azure.storage.blob import BlobClient, ContentSettings
384
+ from pystac import Link
385
+ from pystac.stac_io import DefaultStacIO
386
+
387
+ BLOB_HTTPS_URI_PATTERN = r " https:\/\/ ( . +? ) \. blob\. core\. windows\. net"
388
+
389
+ AzureCredentialType = Union[
390
+ str ,
391
+ Dict[str , str ],
392
+ AzureNamedKeyCredential,
393
+ AzureSasCredential,
394
+ TokenCredential,
395
+ ]
396
+
397
+
398
+ class BlobStacIO (DefaultStacIO ):
399
+ """ A custom StacIO class for reading and writing STAC objects
400
+ from/to Azure Blob storage.
401
+ """
402
+
403
+ conn_str: Optional[str ] = os.getenv(" AZURE_STORAGE_CONNECTION_STRING" )
404
+ account_url: Optional[str ] = None
405
+ credential: Optional[AzureCredentialType] = None
406
+ overwrite: bool = True
407
+
408
+ def _is_blob_uri (self , href : str ) -> bool :
409
+ """ Check if href matches Blob URI pattern."""
410
+ if re.search(
411
+ re.compile(BLOB_HTTPS_URI_PATTERN ), href
412
+ ) is not None or href.startswith(" abfs://" ):
413
+ return True
414
+ else :
415
+ return False
416
+
417
+ def _parse_blob_uri (self , uri : str ) -> Tuple[str , str ]:
418
+ """ Parse the container and blob name from a Blob URI.
419
+
420
+ Parameters
421
+ ----------
422
+ uri
423
+ An Azure Blob URI.
424
+
425
+ Returns
426
+ -------
427
+ The container and blob names.
428
+ """
429
+ if uri.startswith(" abfs://" ):
430
+ path = uri.replace(" abfs://" , " /" )
431
+ else :
432
+ path = urlparse(uri).path
433
+
434
+ parts = path.split(" /" )
435
+ container = parts[1 ]
436
+ blob = " /" .join(parts[2 :])
437
+ return container, blob
438
+
439
+ def _get_blob_client (self , uri : str ) -> BlobClient:
440
+ """ Instantiate a `BlobClient` given a container and blob.
441
+
442
+ Parameters
443
+ ----------
444
+ uri
445
+ An Azure Blob URI.
446
+
447
+ Returns
448
+ -------
449
+ A `BlobClient` for interacting with `blob` in `container`.
450
+ """
451
+ container, blob = self ._parse_blob_uri(uri)
452
+
453
+ if self .conn_str:
454
+ return BlobClient.from_connection_string(
455
+ self .conn_str,
456
+ container_name = container,
457
+ blob_name = blob,
458
+ )
459
+ elif self .account_url:
460
+ return BlobClient(
461
+ account_url = self .account_url,
462
+ container_name = container,
463
+ blob_name = blob,
464
+ credential = self .credential,
465
+ )
466
+ else :
467
+ raise ValueError (
468
+ " Must set conn_str or account_url (and credential if required)"
469
+ )
470
+
471
+ def read_text (self , source : Union[str , Link], * args : Any, ** kwargs : Any) -> str :
472
+ if isinstance (source, Link):
473
+ source = source.href
474
+ if self ._is_blob_uri(source):
475
+ blob_client = self ._get_blob_client(source)
476
+ obj = blob_client.download_blob().readall().decode()
477
+ return obj
478
+ else :
479
+ return super ().read_text(source, * args, ** kwargs)
480
+
481
+ def write_text (
482
+ self , dest : Union[str , Link], txt : str , * args : Any, ** kwargs : Any
483
+ ) -> None :
484
+ """ Write STAC Objects to Blob storage. Note: overwrites by default."""
485
+ if isinstance (dest, Link):
486
+ dest = dest.href
487
+ if self ._is_blob_uri(dest):
488
+ blob_client = self ._get_blob_client(dest)
489
+ blob_client.upload_blob(
490
+ txt,
491
+ overwrite = self .overwrite,
492
+ content_settings = ContentSettings(content_type = " application/json" ),
493
+ )
494
+ else :
495
+ super ().write_text(dest, txt, * args, ** kwargs)
496
+
497
+
498
+ # set Blob storage connection string
499
+ BlobStacIO.conn_str = " my-storage-connection-string"
500
+
501
+ # OR set Blob account URL, credential
502
+ BlobStacIO.account_url = " https://myblobstorageaccount.blob.core.windows.net"
503
+ BlobStacIO.credential = AzureSasCredential(" my-sas-token" )
504
+
505
+ # modify overwrite behavior
506
+ BlobStacIO.overwrite = False
507
+
508
+ # set BlobStacIO as default StacIO
509
+ StacIO.set_default(BlobStacIO)
364
510
365
511
If you only need to customize read operations you can inherit from
366
512
:class: `~pystac.stac_io.DefaultStacIO ` and only overwrite the read method. For example,
0 commit comments