Skip to content

Commit a1f87ef

Browse files
authored
Merge pull request #80 from scaleapi/da-escape-chars
Escape Characters
2 parents eaa1e87 + 7fc9fec commit a1f87ef

File tree

6 files changed

+46
-3
lines changed

6 files changed

+46
-3
lines changed

nucleus/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import json
5555
import logging
5656
import os
57+
import urllib.request
5758
from typing import Any, Dict, List, Optional, Union
5859

5960
import aiohttp
@@ -62,6 +63,8 @@
6263
import tqdm
6364
import tqdm.notebook as tqdm_notebook
6465

66+
from nucleus.url_utils import sanitize_string_args
67+
6568
from .annotation import (
6669
BoxAnnotation,
6770
PolygonAnnotation,
@@ -300,6 +303,7 @@ def delete_dataset(self, dataset_id: str) -> dict:
300303
"""
301304
return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
302305

306+
@sanitize_string_args
303307
def delete_dataset_item(
304308
self, dataset_id: str, item_id: str = None, reference_id: str = None
305309
) -> dict:
@@ -862,6 +866,7 @@ def model_run_info(self, model_run_id: str):
862866
{}, f"modelRun/{model_run_id}/info", requests.get
863867
)
864868

869+
@sanitize_string_args
865870
def dataitem_ref_id(self, dataset_id: str, reference_id: str):
866871
"""
867872
:param dataset_id: internally controlled dataset id
@@ -872,6 +877,7 @@ def dataitem_ref_id(self, dataset_id: str, reference_id: str):
872877
{}, f"dataset/{dataset_id}/refloc/{reference_id}", requests.get
873878
)
874879

880+
@sanitize_string_args
875881
def predictions_ref_id(self, model_run_id: str, ref_id: str):
876882
"""
877883
Returns Model Run info For Dataset Item by model_run_id and item reference_id.

nucleus/dataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import requests
44

55
from nucleus.job import AsyncJob
6+
from nucleus.url_utils import sanitize_string_args
67
from nucleus.utils import (
78
convert_export_payload,
89
format_dataset_item_response,
@@ -35,7 +36,6 @@
3536
)
3637
from .payload_constructor import construct_model_run_creation_payload
3738

38-
3939
WARN_FOR_LARGE_UPLOAD = 50000
4040

4141

@@ -83,6 +83,7 @@ def size(self) -> int:
8383
def items(self) -> List[DatasetItem]:
8484
return self._client.get_dataset_items(self.id)
8585

86+
@sanitize_string_args
8687
def autotag_scores(self, autotag_name, for_scores_greater_than=0):
8788
"""Export the autotag scores above a threshold, largest scores first.
8889

nucleus/url_utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import urllib.request
2+
3+
4+
def sanitize_field(field):
5+
return urllib.request.quote(field.encode("UTF-8"), safe="")
6+
7+
8+
def sanitize_string_args(function):
9+
def sanitized_function(*args, **kwargs):
10+
sanitized_args = []
11+
sanitized_kwargs = {}
12+
for arg in args:
13+
if isinstance(arg, str):
14+
arg = sanitize_field(arg)
15+
sanitized_args.append(arg)
16+
for key, value in kwargs.items():
17+
if isinstance(value, str):
18+
value = sanitize_field(value)
19+
sanitized_kwargs[key] = value
20+
return function(*sanitized_args, **sanitized_kwargs)
21+
22+
return sanitized_function

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ exclude = '''
2121

2222
[tool.poetry]
2323
name = "scale-nucleus"
24-
version = "0.1.10"
24+
version = "0.1.11"
2525
description = "The official Python client library for Nucleus, the Data Platform for AI"
2626
license = "MIT"
2727
authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]

tests/test_dataset.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
TEST_IMG_URLS,
3838
TEST_POLYGON_ANNOTATIONS,
3939
TEST_SEGMENTATION_ANNOTATIONS,
40-
TEST_SLICE_NAME,
4140
reference_id_from_url,
4241
)
4342

@@ -344,6 +343,20 @@ def test_annotate_async_with_error(dataset: Dataset):
344343
assert "Item with id fake_garbage doesn" in str(job.errors())
345344

346345

346+
def test_append_with_special_chars(dataset):
347+
url = TEST_IMG_URLS[0]
348+
ref_id = "test/reference/id"
349+
ds_items = [
350+
DatasetItem(
351+
image_location=url,
352+
reference_id=ref_id,
353+
metadata={"test": "metadata"},
354+
),
355+
]
356+
dataset.append(ds_items)
357+
dataset.refloc(ref_id)
358+
359+
347360
def test_append_and_export(dataset):
348361
# Dataset upload
349362
url = TEST_IMG_URLS[0]

tests/test_slice.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def sort_by_reference_id(items):
154154
)
155155

156156

157+
@pytest.mark.integration
157158
def test_slice_send_to_labeling(dataset):
158159
# Dataset upload
159160
ds_items = []

0 commit comments

Comments
 (0)