Skip to content

Commit 27c7dfd

Browse files
authored
add query for scenes (#400)
1 parent e812ebf commit 27c7dfd

File tree

4 files changed

+36
-2
lines changed

4 files changed

+36
-2
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88

9+
## [0.16.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.3) - 2023-10-10
10+
11+
### Added
12+
- Added a `query_scenes` method on the Dataset class.
13+
- Example
14+
```shell
15+
>>> ds = client.get_dataset('ds_id')
16+
>>> scenes = ds.query_scenes('scene.metadata.foo = "baz"')
17+
[Scene(reference_id="", metadata={}, ...), ...]
18+
```
19+
20+
921
## [0.16.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.2) - 2023-10-03
1022

1123
### Fixed

nucleus/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
LABEL_KEY = "label"
9090
LABELS_KEY = "labels"
9191
MASK_URL_KEY = "mask_url"
92+
MAX_ES_PAGE_SIZE = 10000 # Max number of document fetches allowed per ES page
9293
MAX_PAYLOAD_SIZE = 0x1FFFFFE8 # Set to max string size since we currently convert payloads to strings for processing on the server-side
9394
MESSAGE_KEY = "message"
9495
METADATA_KEY = "metadata"

nucleus/dataset.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
ITEMS_KEY,
4747
JOB_REQ_LIMIT,
4848
KEEP_HISTORY_KEY,
49+
MAX_ES_PAGE_SIZE,
4950
MESSAGE_KEY,
5051
NAME_KEY,
5152
OBJECT_IDS_KEY,
@@ -1972,12 +1973,32 @@ def query_items(self, query: str) -> Iterable[DatasetItem]:
19721973
client=self._client,
19731974
endpoint=f"dataset/{self.id}/queryItemsPage",
19741975
result_key=ITEMS_KEY,
1975-
page_size=10000, # max ES page size
1976+
page_size=MAX_ES_PAGE_SIZE,
19761977
query=query,
19771978
)
19781979
for item_json in json_generator:
19791980
yield DatasetItem.from_json(item_json)
19801981

1982+
def query_scenes(self, query: str) -> Iterable[Scene]:
1983+
"""
1984+
Fetches all Scenes that pertain to a given structured query.
1985+
1986+
Args:
1987+
query: Structured query compatible with the `Nucleus query language <https://nucleus.scale.com/docs/query-language-reference>`_.
1988+
1989+
Returns:
1990+
A list of Scene query results.
1991+
"""
1992+
json_generator = paginate_generator(
1993+
client=self._client,
1994+
endpoint=f"dataset/{self.id}/queryScenesPage",
1995+
result_key=ITEMS_KEY,
1996+
page_size=MAX_ES_PAGE_SIZE,
1997+
query=query,
1998+
)
1999+
for item_json in json_generator:
2000+
yield Scene.from_json(item_json, None, True)
2001+
19812002
@property
19822003
def tracks(self) -> List[Track]:
19832004
"""Tracks unique to this dataset.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"] # Easy ignore for getting it running
2525

2626
[tool.poetry]
2727
name = "scale-nucleus"
28-
version = "0.16.2"
28+
version = "0.16.3"
2929
description = "The official Python client library for Nucleus, the Data Platform for AI"
3030
license = "MIT"
3131
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

0 commit comments

Comments
 (0)