Skip to content

Commit d8ac4f9

Browse files
authored
feat: add iter_search (#151)
cc @betolink Closes #148
1 parent 22515f7 commit d8ac4f9

File tree

7 files changed

+175
-4
lines changed

7 files changed

+175
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99

1010
- `type` field to geoparquet writes ([#136](https://github.com/stac-utils/rustac-py/pull/136), <https://github.com/stac-utils/rustac/pull/736>)
1111
- `parquet_compression` argument to `write` and `search_to` ([#150](https://github.com/stac-utils/rustac-py/pull/150))
12+
- `iter_search` ([#151](https://github.com/stac-utils/rustac-py/pull/151))
1213

1314
### Fixed
1415

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ pyo3-log = "0.12.1"
4444
tracing = "0.1.41"
4545
pyo3-object_store = "0.2.0"
4646
parquet = "55.1.0"
47+
futures-core = "0.3.31"
48+
futures-util = "0.3.31"
4749

4850
[build-dependencies]
4951
cargo-lock = "10"

python/rustac/rustac.pyi

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ async def search(
284284
**kwargs: str,
285285
) -> list[dict[str, Any]]:
286286
"""
287-
Searches a STAC API server.
287+
Searches a STAC API server or a stac-geoparquet file.
288288
289289
Args:
290290
href: The STAC API to search.
@@ -333,6 +333,70 @@ async def search(
333333
... )
334334
"""
335335

336+
async def iter_search(
337+
href: str,
338+
*,
339+
intersects: str | dict[str, Any] | None = None,
340+
ids: str | list[str] | None = None,
341+
collections: str | list[str] | None = None,
342+
max_items: int | None = None,
343+
limit: int | None = None,
344+
bbox: list[float] | None = None,
345+
datetime: str | None = None,
346+
include: str | list[str] | None = None,
347+
exclude: str | list[str] | None = None,
348+
sortby: str | list[str | dict[str, str]] | None = None,
349+
filter: str | dict[str, Any] | None = None,
350+
query: dict[str, Any] | None = None,
351+
use_duckdb: bool | None = None,
352+
**kwargs: str,
353+
) -> AsyncIterator[dict[str, Any]]:
354+
"""
355+
Searches a STAC API server and iterates over its items.
356+
357+
Args:
358+
href: The STAC API to search.
359+
intersects: Searches items
360+
by performing intersection between their geometry and provided GeoJSON
361+
geometry.
362+
ids: Array of Item ids to return.
363+
collections: Array of one or more Collection IDs that
364+
each matching Item must be in.
365+
limit: The page size returned from the server.
366+
bbox: Requested bounding box.
367+
datetime: Single date+time, or a range (`/` separator),
368+
formatted to RFC 3339, section 5.6. Use double dots .. for open
369+
date ranges.
370+
include: fields to include in the response (see [the
371+
extension
372+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
373+
for more on the semantics).
374+
exclude: fields to exclude from the response (see [the
375+
extension
376+
docs](https://github.com/stac-api-extensions/fields?tab=readme-ov-file#includeexclude-semantics))
377+
for more on the semantics).
378+
sortby: Fields by which to sort results (use `-field` to sort descending).
379+
filter: CQL2 filter expression. Strings
380+
will be interpreted as cql2-text, dictionaries as cql2-json.
381+
query: Additional filtering based on properties.
382+
It is recommended to use filter instead, if possible.
383+
kwargs: Additional parameters to pass in to the search.
384+
385+
Returns:
386+
An iterator over STAC items
387+
388+
Examples:
389+
>>> search = await rustac.iter_search(
390+
... "https://landsatlook.usgs.gov/stac-server",
391+
... collections=["landsat-c2l2-sr"],
392+
... intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
393+
... sortby="-properties.datetime",
394+
... )
395+
>>> async for item in search:
396+
... items.append(item)
397+
...
398+
"""
399+
336400
async def search_to(
337401
outfile: str,
338402
href: str,

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ fn rustac(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
3434
)?)?;
3535
m.add_function(wrap_pyfunction!(migrate::migrate, m)?)?;
3636
m.add_function(wrap_pyfunction!(read::read, m)?)?;
37+
m.add_function(wrap_pyfunction!(search::iter_search, m)?)?;
3738
m.add_function(wrap_pyfunction!(search::search, m)?)?;
3839
m.add_function(wrap_pyfunction!(search::search_to, m)?)?;
3940
m.add_function(wrap_pyfunction!(version::sha, m)?)?;

src/search.rs

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,79 @@
11
use crate::{Error, Json, Result};
2+
use futures_core::Stream;
3+
use futures_core::stream::BoxStream;
4+
use futures_util::StreamExt;
25
use geojson::Geometry;
36
use pyo3::prelude::*;
47
use pyo3::{Bound, FromPyObject, PyErr, PyResult, exceptions::PyValueError, types::PyDict};
58
use pyo3_object_store::AnyObjectStore;
9+
use serde_json::{Map, Value};
610
use stac::Bbox;
7-
use stac_api::{Fields, Filter, Items, Search, Sortby};
11+
use stac_api::{Client, Fields, Filter, Items, Search, Sortby};
812
use stac_io::{Format, StacStore};
13+
use std::sync::Arc;
14+
use tokio::{pin, sync::Mutex};
15+
16+
#[pyclass]
17+
struct SearchIterator(Arc<Mutex<BoxStream<'static, stac_api::Result<Map<String, Value>>>>>);
18+
19+
#[pymethods]
20+
impl SearchIterator {
21+
fn __aiter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
22+
slf
23+
}
24+
25+
fn __anext__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
26+
let stream = self.0.clone();
27+
pyo3_async_runtimes::tokio::future_into_py(py, async move {
28+
let mut stream = stream.lock().await;
29+
if let Some(result) = stream.next().await {
30+
let item = result.map_err(Error::from)?;
31+
Ok(Some(Json(item)))
32+
} else {
33+
Ok(None)
34+
}
35+
})
36+
}
37+
}
38+
39+
#[pyfunction]
40+
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, **kwargs))]
41+
#[allow(clippy::too_many_arguments)]
42+
pub fn iter_search<'py>(
43+
py: Python<'py>,
44+
href: String,
45+
intersects: Option<StringOrDict>,
46+
ids: Option<StringOrList>,
47+
collections: Option<StringOrList>,
48+
limit: Option<u64>,
49+
bbox: Option<Vec<f64>>,
50+
datetime: Option<String>,
51+
include: Option<StringOrList>,
52+
exclude: Option<StringOrList>,
53+
sortby: Option<PySortby<'py>>,
54+
filter: Option<StringOrDict>,
55+
query: Option<Bound<'py, PyDict>>,
56+
kwargs: Option<Bound<'_, PyDict>>,
57+
) -> PyResult<Bound<'py, PyAny>> {
58+
let search = build(
59+
intersects,
60+
ids,
61+
collections,
62+
limit,
63+
bbox,
64+
datetime,
65+
include,
66+
exclude,
67+
sortby,
68+
filter,
69+
query,
70+
kwargs,
71+
)?;
72+
pyo3_async_runtimes::tokio::future_into_py(py, async move {
73+
let stream = iter_search_api(href, search).await?;
74+
Ok(SearchIterator(Arc::new(Mutex::new(Box::pin(stream)))))
75+
})
76+
}
977

1078
#[pyfunction]
1179
#[pyo3(signature = (href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, use_duckdb=None, **kwargs))]
@@ -165,8 +233,32 @@ async fn search_api(
165233
search: Search,
166234
max_items: Option<usize>,
167235
) -> Result<stac_api::ItemCollection> {
168-
let value = stac_api::client::search(&href, search, max_items).await?;
169-
Ok(value)
236+
let stream = iter_search_api(href, search).await?;
237+
pin!(stream);
238+
let mut items = if let Some(max_items) = max_items {
239+
Vec::with_capacity(max_items)
240+
} else {
241+
Vec::new()
242+
};
243+
while let Some(result) = stream.next().await {
244+
let item = result?;
245+
items.push(item);
246+
if let Some(max_items) = max_items {
247+
if items.len() >= max_items {
248+
break;
249+
}
250+
}
251+
}
252+
Ok(items.into())
253+
}
254+
255+
async fn iter_search_api(
256+
href: String,
257+
search: Search,
258+
) -> Result<impl Stream<Item = stac_api::Result<Map<String, Value>>>> {
259+
let client = Client::new(&href)?;
260+
let stream = client.search(search).await?;
261+
Ok(stream)
170262
}
171263

172264
/// Creates a [Search] from Python arguments.

tests/test_search.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,12 @@ async def test_cql(data: Path) -> None:
113113
},
114114
max_items=1,
115115
)
116+
117+
118+
async def test_iter_search() -> None:
119+
items = []
120+
search = await rustac.iter_search("https://landsatlook.usgs.gov/stac-server")
121+
async for item in search:
122+
items.append(item)
123+
if len(items) >= 10:
124+
break

0 commit comments

Comments
 (0)