add query for scenes (#400)

jean-lucas · web-flow · commit 27c7dfd59023 · 2023-10-10T18:58:00.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [0.16.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.3) - 2023-10-10
+
+### Added
+- Added a `query_scenes` method on the Dataset class.
+- Example
+```shell
+>>> ds = client.get_dataset('ds_id')
+>>> scenes = ds.query_scenes('scene.metadata.foo = "baz"')
+[Scene(reference_id="", metadata={}, ...), ...]
+```
+
+
 ## [0.16.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.16.2) - 2023-10-03
 
 ### Fixed
diff --git a/nucleus/constants.py b/nucleus/constants.py
@@ -89,6 +89,7 @@
 LABEL_KEY = "label"
 LABELS_KEY = "labels"
 MASK_URL_KEY = "mask_url"
+MAX_ES_PAGE_SIZE = 10000  # Max number of document fetches allowed per ES page
 MAX_PAYLOAD_SIZE = 0x1FFFFFE8  # Set to max string size since we currently convert payloads to strings for processing on the server-side
 MESSAGE_KEY = "message"
 METADATA_KEY = "metadata"
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -46,6 +46,7 @@
     ITEMS_KEY,
     JOB_REQ_LIMIT,
     KEEP_HISTORY_KEY,
+    MAX_ES_PAGE_SIZE,
     MESSAGE_KEY,
     NAME_KEY,
     OBJECT_IDS_KEY,
@@ -1972,12 +1973,32 @@ def query_items(self, query: str) -> Iterable[DatasetItem]:
             client=self._client,
             endpoint=f"dataset/{self.id}/queryItemsPage",
             result_key=ITEMS_KEY,
-            page_size=10000,  # max ES page size
+            page_size=MAX_ES_PAGE_SIZE,
             query=query,
         )
         for item_json in json_generator:
             yield DatasetItem.from_json(item_json)
 
+    def query_scenes(self, query: str) -> Iterable[Scene]:
+        """
+        Fetches all Scenes that pertain to a given structured query.
+
+        Args:
+            query: Structured query compatible with the `Nucleus query language <https://nucleus.scale.com/docs/query-language-reference>`_.
+
+        Returns:
+            A list of Scene query results.
+        """
+        json_generator = paginate_generator(
+            client=self._client,
+            endpoint=f"dataset/{self.id}/queryScenesPage",
+            result_key=ITEMS_KEY,
+            page_size=MAX_ES_PAGE_SIZE,
+            query=query,
+        )
+        for item_json in json_generator:
+            yield Scene.from_json(item_json, None, True)
+
     @property
     def tracks(self) -> List[Track]:
         """Tracks unique to this dataset.
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ ignore = ["E501", "E741", "E731", "F401"]  # Easy ignore for getting it running
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.16.2"
+version = "0.16.3"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]