Skip to content

Commit 522c1b9

Browse files
bmcandrgadomski
andauthored
Add example of custom StacIO for Azure Blob storage (#1372)
* Add example of custom StacIO for Azure Blob storage * Add #1372 to changelog * Move implicit vars to class attributes, improve Blob URI handling --------- Co-authored-by: Pete Gadomski <pete.gadomski@gmail.com>
1 parent 65ea9a9 commit 522c1b9

File tree

2 files changed

+189
-42
lines changed

2 files changed

+189
-42
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- Allow object ID as input for getting APILayoutStrategy hrefs and add `items`, `collections`, `search`, `conformance`, `service_desc` and `service_doc` href methods ([#1335](https://github.com/stac-utils/pystac/pull/1335))
88
- Updated classification extension to v2.0.0 ([#1359](https://github.com/stac-utils/pystac/pull/1359))
99
- Update docstring of `name` argument to `Classification.apply` and `Classification.create` to agree with extension specification ([#1356](https://github.com/stac-utils/pystac/pull/1356))
10+
- Add example of custom `StacIO` for Azure Blob Storage to docs ([#1372](https://github.com/stac-utils/pystac/pull/1372))
1011

1112
### Fixed
1213

docs/concepts.rst

Lines changed: 188 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -318,49 +318,195 @@ argument of most object-specific I/O methods. You can also use
318318
:meth:`pystac.StacIO.set_default` in your client's ``__init__.py`` file to make this
319319
sub-class the default :class:`pystac.StacIO` implementation throughout the library.
320320

321-
For example, this code will allow
321+
For example, the following code examples will allow
322322
for reading from AWS's S3 cloud object storage using `boto3
323-
<https://boto3.amazonaws.com/v1/documentation/api/latest/index.html>`__:
324-
325-
.. code-block:: python
326-
327-
from urllib.parse import urlparse
328-
import boto3
329-
from pystac import Link
330-
from pystac.stac_io import DefaultStacIO, StacIO
331-
from typing import Union, Any
332-
333-
class CustomStacIO(DefaultStacIO):
334-
def __init__(self):
335-
self.s3 = boto3.resource("s3")
336-
super().__init__()
337-
338-
def read_text(
339-
self, source: Union[str, Link], *args: Any, **kwargs: Any
340-
) -> str:
341-
parsed = urlparse(source)
342-
if parsed.scheme == "s3":
343-
bucket = parsed.netloc
344-
key = parsed.path[1:]
345-
346-
obj = self.s3.Object(bucket, key)
347-
return obj.get()["Body"].read().decode("utf-8")
348-
else:
349-
return super().read_text(source, *args, **kwargs)
350-
351-
def write_text(
352-
self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
353-
) -> None:
354-
parsed = urlparse(dest)
355-
if parsed.scheme == "s3":
356-
bucket = parsed.netloc
357-
key = parsed.path[1:]
358-
self.s3.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8")
359-
else:
360-
super().write_text(dest, txt, *args, **kwargs)
361-
362-
StacIO.set_default(CustomStacIO)
363-
323+
<https://boto3.amazonaws.com/v1/documentation/api/latest/index.html>`__
324+
or Azure Blob Storage using the `Azure SDK for Python
325+
<https://learn.microsoft.com/en-us/python/api/overview/azure/storage-blob-readme?view=azure-python>`__:
326+
327+
.. tab-set::
328+
.. tab-item:: AWS S3
329+
330+
.. code-block:: python
331+
332+
from urllib.parse import urlparse
333+
import boto3
334+
from pystac import Link
335+
from pystac.stac_io import DefaultStacIO, StacIO
336+
from typing import Union, Any
337+
338+
class CustomStacIO(DefaultStacIO):
339+
def __init__(self):
340+
self.s3 = boto3.resource("s3")
341+
super().__init__()
342+
343+
def read_text(
344+
self, source: Union[str, Link], *args: Any, **kwargs: Any
345+
) -> str:
346+
parsed = urlparse(source)
347+
if parsed.scheme == "s3":
348+
bucket = parsed.netloc
349+
key = parsed.path[1:]
350+
351+
obj = self.s3.Object(bucket, key)
352+
return obj.get()["Body"].read().decode("utf-8")
353+
else:
354+
return super().read_text(source, *args, **kwargs)
355+
356+
def write_text(
357+
self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
358+
) -> None:
359+
parsed = urlparse(dest)
360+
if parsed.scheme == "s3":
361+
bucket = parsed.netloc
362+
key = parsed.path[1:]
363+
self.s3.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8")
364+
else:
365+
super().write_text(dest, txt, *args, **kwargs)
366+
367+
StacIO.set_default(CustomStacIO)
368+
369+
.. tab-item:: Azure Blob Storage
370+
371+
.. code-block:: python
372+
373+
import os
374+
import re
375+
from typing import Any, Dict, Optional, Tuple, Union
376+
from urllib.parse import urlparse
377+
378+
from azure.core.credentials import (
379+
AzureNamedKeyCredential,
380+
AzureSasCredential,
381+
TokenCredential,
382+
)
383+
from azure.storage.blob import BlobClient, ContentSettings
384+
from pystac import Link
385+
from pystac.stac_io import DefaultStacIO
386+
387+
BLOB_HTTPS_URI_PATTERN = r"https:\/\/(.+?)\.blob\.core\.windows\.net"
388+
389+
AzureCredentialType = Union[
390+
str,
391+
Dict[str, str],
392+
AzureNamedKeyCredential,
393+
AzureSasCredential,
394+
TokenCredential,
395+
]
396+
397+
398+
class BlobStacIO(DefaultStacIO):
399+
"""A custom StacIO class for reading and writing STAC objects
400+
from/to Azure Blob storage.
401+
"""
402+
403+
conn_str: Optional[str] = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
404+
account_url: Optional[str] = None
405+
credential: Optional[AzureCredentialType] = None
406+
overwrite: bool = True
407+
408+
def _is_blob_uri(self, href: str) -> bool:
409+
"""Check if href matches Blob URI pattern."""
410+
if re.search(
411+
re.compile(BLOB_HTTPS_URI_PATTERN), href
412+
) is not None or href.startswith("abfs://"):
413+
return True
414+
else:
415+
return False
416+
417+
def _parse_blob_uri(self, uri: str) -> Tuple[str, str]:
418+
"""Parse the container and blob name from a Blob URI.
419+
420+
Parameters
421+
----------
422+
uri
423+
An Azure Blob URI.
424+
425+
Returns
426+
-------
427+
The container and blob names.
428+
"""
429+
if uri.startswith("abfs://"):
430+
path = uri.replace("abfs://", "/")
431+
else:
432+
path = urlparse(uri).path
433+
434+
parts = path.split("/")
435+
container = parts[1]
436+
blob = "/".join(parts[2:])
437+
return container, blob
438+
439+
def _get_blob_client(self, uri: str) -> BlobClient:
440+
"""Instantiate a `BlobClient` given a container and blob.
441+
442+
Parameters
443+
----------
444+
uri
445+
An Azure Blob URI.
446+
447+
Returns
448+
-------
449+
A `BlobClient` for interacting with `blob` in `container`.
450+
"""
451+
container, blob = self._parse_blob_uri(uri)
452+
453+
if self.conn_str:
454+
return BlobClient.from_connection_string(
455+
self.conn_str,
456+
container_name=container,
457+
blob_name=blob,
458+
)
459+
elif self.account_url:
460+
return BlobClient(
461+
account_url=self.account_url,
462+
container_name=container,
463+
blob_name=blob,
464+
credential=self.credential,
465+
)
466+
else:
467+
raise ValueError(
468+
"Must set conn_str or account_url (and credential if required)"
469+
)
470+
471+
def read_text(self, source: Union[str, Link], *args: Any, **kwargs: Any) -> str:
472+
if isinstance(source, Link):
473+
source = source.href
474+
if self._is_blob_uri(source):
475+
blob_client = self._get_blob_client(source)
476+
obj = blob_client.download_blob().readall().decode()
477+
return obj
478+
else:
479+
return super().read_text(source, *args, **kwargs)
480+
481+
def write_text(
482+
self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any
483+
) -> None:
484+
"""Write STAC Objects to Blob storage. Note: overwrites by default."""
485+
if isinstance(dest, Link):
486+
dest = dest.href
487+
if self._is_blob_uri(dest):
488+
blob_client = self._get_blob_client(dest)
489+
blob_client.upload_blob(
490+
txt,
491+
overwrite=self.overwrite,
492+
content_settings=ContentSettings(content_type="application/json"),
493+
)
494+
else:
495+
super().write_text(dest, txt, *args, **kwargs)
496+
497+
498+
# set Blob storage connection string
499+
BlobStacIO.conn_str = "my-storage-connection-string"
500+
501+
# OR set Blob account URL, credential
502+
BlobStacIO.account_url = "https://myblobstorageaccount.blob.core.windows.net"
503+
BlobStacIO.credential = AzureSasCredential("my-sas-token")
504+
505+
# modify overwrite behavior
506+
BlobStacIO.overwrite = False
507+
508+
# set BlobStacIO as default StacIO
509+
StacIO.set_default(BlobStacIO)
364510
365511
If you only need to customize read operations you can inherit from
366512
:class:`~pystac.stac_io.DefaultStacIO` and only overwrite the read method. For example,

0 commit comments

Comments
 (0)