Skip to content

Commit 18f7ccd

Browse files
authored
add Slice raw export method (#187)
* add Slice raw export method * bump ver; update changelog * add test * isort
1 parent 6012b08 commit 18f7ccd

File tree

4 files changed

+66
-1
lines changed

4 files changed

+66
-1
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [0.4.4](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.4) - 2021-01-04
8+
9+
### Added
10+
- `Slice.export_raw_items()` method that fetches accessible (signed) URLs for all items in the Slice.
11+
12+
## [0.4.3](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.3) - 2022-01-03
13+
14+
### Added
15+
- Improved error messages for categorization
16+
17+
### Changed
18+
- Category taxonomies are now updatable
19+
720
## [0.4.2](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.4.2) - 2021-12-16
821

922
### Added

nucleus/slice.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,29 @@ def export_embeddings(
280280
)
281281
return api_payload
282282

283+
def export_raw_items(self) -> List[Dict[str, str]]:
284+
"""Fetches a list of accessible URLs for each item in the Slice.
285+
286+
Returns:
287+
List where each element is a dict containing a DatasetItem and its
288+
accessible (signed) Scale URL.
289+
::
290+
291+
List[{
292+
"id": str,
293+
"ref_id": str,
294+
"metadata": Dict[str, Union[str, int]],
295+
"original_url": str,
296+
"scale_url": str
297+
}]
298+
"""
299+
api_payload = self._client.make_request(
300+
payload=None,
301+
route=f"slice/{self.id}/exportRawItems",
302+
requests_command=requests.get,
303+
)
304+
return api_payload
305+
283306

284307
def check_annotations_are_in_slice(
285308
annotations: List[Annotation], slice_to_check: Slice

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.4.3"
24+
version = "0.4.4"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_slice.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import copy
22

33
import pytest
4+
import requests
45

56
from nucleus import BoxAnnotation, Dataset, DatasetItem, NucleusClient, Slice
67
from nucleus.constants import (
@@ -189,3 +190,31 @@ def test_slice_send_to_labeling(dataset):
189190

190191
response = slc.send_to_labeling(TEST_PROJECT_ID)
191192
assert isinstance(response, AsyncJob)
193+
194+
195+
def test_slice_export_raw_items(dataset):
196+
# Dataset upload
197+
orig_url = TEST_IMG_URLS[0]
198+
ds_items = [
199+
DatasetItem(
200+
image_location=orig_url,
201+
reference_id=reference_id_from_url(orig_url),
202+
)
203+
]
204+
response = dataset.append(ds_items)
205+
assert ERROR_PAYLOAD not in response.json()
206+
207+
# Slice creation
208+
slc = dataset.create_slice(
209+
name=(TEST_SLICE_NAME + "-raw-export"),
210+
reference_ids=[ds_items[0].reference_id],
211+
)
212+
213+
# Export single raw item
214+
res = slc.export_raw_items()
215+
export_url = res["raw_dataset_items"][0]["scale_url"]
216+
217+
orig_bytes = requests.get(orig_url).content
218+
export_bytes = requests.get(export_url).content
219+
220+
assert hash(orig_bytes) == hash(export_bytes)

0 commit comments

Comments
 (0)