From e41ba44dcf4867f8b4afbf2e5fe677e3360bfdb9 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 19:15:54 +0000 Subject: [PATCH 1/9] Use pydantic-settings for BaseSettings as currently recommended by pydantic https://docs.pydantic.dev/latest/concepts/pydantic_settings/ --- src/pypgstac/src/pypgstac/db.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/pypgstac/src/pypgstac/db.py b/src/pypgstac/src/pypgstac/db.py index 001ec55b..3a80213b 100644 --- a/src/pypgstac/src/pypgstac/db.py +++ b/src/pypgstac/src/pypgstac/db.py @@ -10,12 +10,7 @@ from psycopg import Connection, sql from psycopg.types.json import set_json_dumps, set_json_loads from psycopg_pool import ConnectionPool - -try: - from pydantic.v1 import BaseSettings # type:ignore -except ImportError: - from pydantic import BaseSettings # type:ignore - +from pydantic_settings import BaseSettings from tenacity import retry, retry_if_exception_type, stop_after_attempt logger = logging.getLogger(__name__) From dc44e37ff7fc65c4f1542a9374de6336df23d20e Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 19:16:28 +0000 Subject: [PATCH 2/9] Update classifiers to include Python 3.12 and 3.13 --- src/pypgstac/pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pypgstac/pyproject.toml b/src/pypgstac/pyproject.toml index 6fb97a13..7bb8bb90 100644 --- a/src/pypgstac/pyproject.toml +++ b/src/pypgstac/pyproject.toml @@ -18,6 +18,8 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] dependencies = [ "cachetools==5.3.*", From 29bced8092e050da2e697b2cbd8a8059cfecb471 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 19:18:36 +0000 Subject: [PATCH 3/9] Loosen dependencies requirements #341 Also remove unused packages: flake8, types-setuptools --- src/pypgstac/pyproject.toml | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/pypgstac/pyproject.toml b/src/pypgstac/pyproject.toml index 7bb8bb90..387aeff2 100644 --- a/src/pypgstac/pyproject.toml +++ b/src/pypgstac/pyproject.toml @@ -22,36 +22,34 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "cachetools==5.3.*", - "fire==0.4.*", - "hydraters==0.1.*", - "orjson>=3.6.2", - "plpygis==0.2.*", - "pydantic>=1.7", - "python-dateutil==2.8.*", - "smart-open>=4.2", - "tenacity==8.1.*", - "version-parser>= 1.0.1", + "cachetools>=5.0.0", + "fire>=0.4.0", + "hydraters>=0.1.0", + "orjson>=3.6.0", + "plpygis>=0.2.0", + "pydantic-settings>=2.0.0", + "python-dateutil>=2.8.0", + "smart-open>=4.2.0", + "tenacity>=8.0.0", + "version-parser>=1.0.0", ] [project.optional-dependencies] test = [ "pytest", "pytest-cov", - "pystac[validation]==1.*", + "pystac[validation]>=1.0.0", "types-cachetools", ] dev = [ - "flake8==7.1.1", - "black>=24.10.0", - "mypy>=1.13.0", - "types-setuptools", - "ruff==0.8.2", + "black", + "mypy", + "ruff", "pre-commit", ] psycopg = [ - "psycopg[binary]==3.1.*", - "psycopg-pool==3.1.*", + "psycopg[binary]>=3.1", + "psycopg-pool>=3.1", ] migrations = [ "psycopg2-binary", From 92cbae868e62b44b72b581d71d9ec141fa50756c Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 19:25:09 +0000 Subject: [PATCH 4/9] psycopg since 3.1.9 accepts orjson.dumps without wrapper method --- src/pypgstac/pyproject.toml | 2 +- src/pypgstac/src/pypgstac/db.py | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/pypgstac/pyproject.toml b/src/pypgstac/pyproject.toml index 387aeff2..dd0dd254 100644 --- a/src/pypgstac/pyproject.toml +++ b/src/pypgstac/pyproject.toml @@ -48,7 +48,7 @@ dev = [ "pre-commit", ] psycopg = [ - "psycopg[binary]>=3.1", + "psycopg[binary]>=3.1.9", "psycopg-pool>=3.1", ] migrations = [ diff --git a/src/pypgstac/src/pypgstac/db.py b/src/pypgstac/src/pypgstac/db.py index 3a80213b..51d97a94 100644 --- a/src/pypgstac/src/pypgstac/db.py +++ b/src/pypgstac/src/pypgstac/db.py @@ -16,12 +16,7 @@ logger = logging.getLogger(__name__) -def dumps(data: dict) -> str: - """Dump dictionary as string.""" - return orjson.dumps(data).decode() - - -set_json_dumps(dumps) +set_json_dumps(orjson.dumps) set_json_loads(orjson.loads) @@ -299,4 +294,4 @@ def func(self, function_name: str, *args: Any) -> Generator: def search(self, query: Union[dict, str, psycopg.types.json.Jsonb] = "{}") -> str: """Search PgSTAC.""" - return dumps(next(self.func("search", query))[0]) + return orjson.dumps(next(self.func("search", query))[0]).decode() From 8b11296e65e084c611cbde242a885459f8ffd37c Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 20:05:45 +0000 Subject: [PATCH 5/9] Improve chunked_iterable result typing --- src/pypgstac/src/pypgstac/load.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index 76d97e7e..7a4b1e80 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -19,6 +19,7 @@ Optional, TextIO, Tuple, + TypeVar, Union, ) @@ -55,7 +56,13 @@ class Partition: requires_update: bool -def chunked_iterable(iterable: Iterable, size: Optional[int] = 10000) -> Iterable: +_T = TypeVar("_T") + + +def chunked_iterable( + iterable: Iterable[_T], + size: Optional[int] = 10000, +) -> Generator[Tuple[_T, ...], None, None]: """Chunk an iterable.""" it = iter(iterable) while True: From e6b26e4dd5ea6b0c1ae3c2a32cbd580ad02c6728 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 20:06:50 +0000 Subject: [PATCH 6/9] Optimize open_std filename stdin check and fix typing to match optional behavior --- src/pypgstac/src/pypgstac/load.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index 7a4b1e80..70073d20 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -91,19 +91,19 @@ class Methods(str, Enum): @contextlib.contextmanager def open_std( - filename: str, + filename: Optional[str], mode: str = "r", *args: Any, **kwargs: Any, ) -> Generator[Any, None, None]: """Open files and i/o streams transparently.""" fh: Union[TextIO, BinaryIO] - if ( - filename is None - or filename == "-" - or filename == "stdin" - or filename == "stdout" - ): + if filename in { + None, + "-", + "stdin", + "stdout", + }: stream = sys.stdin if "r" in mode else sys.stdout fh = stream.buffer if "b" in mode else stream close = False From 340b6b78b031c8f933e82be14ab78ceb4f29116a Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 20:07:23 +0000 Subject: [PATCH 7/9] Raise TypeError instead of silent return --- src/pypgstac/src/pypgstac/load.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index 70073d20..7871d42f 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -153,6 +153,8 @@ def read_json(file: Union[Path, str, Iterator[Any]] = "stdin") -> Iterable: yield line else: yield orjson.loads(line) + else: + raise TypeError(f"Unsupported read json from file of type {type(file)}") class Loader: @@ -588,6 +590,10 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator: item[field] = tab_split[i] item["partition"] = self._partition_update(item) yield item + else: + raise TypeError( + f"Unsupported read dehydrated from file of type {type(file)}", + ) def read_hydrated( self, From f5f134b4fd54ff26a0cacb5cb35c3b8f0ac8069c Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 20:08:11 +0000 Subject: [PATCH 8/9] Remove redundant open_std assign --- src/pypgstac/src/pypgstac/load.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index 7871d42f..bc3073f4 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -126,8 +126,7 @@ def read_json(file: Union[Path, str, Iterator[Any]] = "stdin") -> Iterable: if file is None: file = "stdin" if isinstance(file, str): - open_file: Any = open_std(file, "r") - with open_file as f: + with open_std(file, "r") as f: # Try reading line by line as ndjson try: for line in f: @@ -561,8 +560,7 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator: if file is None: file = "stdin" if isinstance(file, str): - open_file: Any = open_std(file, "r") - with open_file as f: + with open_std(file, "r") as f: # Note: if 'content' is changed to be anything # but the last field, the logic below will break. fields = [ From f879731774f0cd07b0769a5f053e13d177d5fc26 Mon Sep 17 00:00:00 2001 From: Kamil Monicz Date: Mon, 27 Jan 2025 20:09:37 +0000 Subject: [PATCH 9/9] Improve typing of read_json file and create common type alias Path type was never supported so also remove false type --- src/pypgstac/src/pypgstac/load.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/pypgstac/src/pypgstac/load.py b/src/pypgstac/src/pypgstac/load.py index bc3073f4..5b961a8a 100644 --- a/src/pypgstac/src/pypgstac/load.py +++ b/src/pypgstac/src/pypgstac/load.py @@ -8,14 +8,12 @@ from dataclasses import dataclass from datetime import datetime from enum import Enum -from pathlib import Path from typing import ( Any, BinaryIO, Dict, Generator, Iterable, - Iterator, Optional, TextIO, Tuple, @@ -121,7 +119,10 @@ def open_std( pass -def read_json(file: Union[Path, str, Iterator[Any]] = "stdin") -> Iterable: +_ReadJsonFileType = Union[str, Iterable[Union[Dict, bytes, bytearray, memoryview, str]]] + + +def read_json(file: _ReadJsonFileType = "stdin") -> Generator[Any, None, None]: """Load data from an ndjson or json file.""" if file is None: file = "stdin" @@ -205,7 +206,7 @@ def collection_json(self, collection_id: str) -> Tuple[Dict[str, Any], int, str] def load_collections( self, - file: Union[Path, str, Iterator[Any]] = "stdin", + file: _ReadJsonFileType = "stdin", insert_mode: Optional[Methods] = Methods.insert, ) -> None: """Load a collections json or ndjson file.""" @@ -556,7 +557,10 @@ def _partition_update(self, item: Dict[str, Any]) -> str: return partition_name - def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator: + def read_dehydrated( + self, + file: str = "stdin", + ) -> Generator[Dict[str, Any], None, None]: if file is None: file = "stdin" if isinstance(file, str): @@ -595,8 +599,8 @@ def read_dehydrated(self, file: Union[Path, str] = "stdin") -> Generator: def read_hydrated( self, - file: Union[Path, str, Iterator[Any]] = "stdin", - ) -> Generator: + file: _ReadJsonFileType = "stdin", + ) -> Generator[Dict[str, Any], None, None]: for line in read_json(file): item = self.format_item(line) item["partition"] = self._partition_update(item) @@ -604,7 +608,7 @@ def read_hydrated( def load_items( self, - file: Union[Path, str, Iterator[Any]] = "stdin", + file: _ReadJsonFileType = "stdin", insert_mode: Optional[Methods] = Methods.insert, dehydrated: Optional[bool] = False, chunksize: Optional[int] = 10000, @@ -630,7 +634,7 @@ def load_items( logger.debug(f"Adding data to database took {time.perf_counter() - t} seconds.") - def format_item(self, _item: Union[Path, str, Dict[str, Any]]) -> Dict[str, Any]: + def format_item(self, _item: Union[str, Dict[str, Any]]) -> Dict[str, Any]: """Format an item to insert into a record.""" out: Dict[str, Any] = {} item: Dict[str, Any]