Merge pull request #80 from scaleapi/da-escape-chars

ardila · web-flow · commit a1f87ef3909d · 2021-06-28T12:25:46.000-07:00
Escape Characters
diff --git a/nucleus/__init__.py b/nucleus/__init__.py
@@ -54,6 +54,7 @@
 import json
 import logging
 import os
+import urllib.request
 from typing import Any, Dict, List, Optional, Union
 
 import aiohttp
@@ -62,6 +63,8 @@
 import tqdm
 import tqdm.notebook as tqdm_notebook
 
+from nucleus.url_utils import sanitize_string_args
+
 from .annotation import (
     BoxAnnotation,
     PolygonAnnotation,
@@ -300,6 +303,7 @@ def delete_dataset(self, dataset_id: str) -> dict:
         """
         return self.make_request({}, f"dataset/{dataset_id}", requests.delete)
 
+    @sanitize_string_args
     def delete_dataset_item(
         self, dataset_id: str, item_id: str = None, reference_id: str = None
     ) -> dict:
@@ -862,6 +866,7 @@ def model_run_info(self, model_run_id: str):
             {}, f"modelRun/{model_run_id}/info", requests.get
         )
 
+    @sanitize_string_args
     def dataitem_ref_id(self, dataset_id: str, reference_id: str):
         """
         :param dataset_id: internally controlled dataset id
@@ -872,6 +877,7 @@ def dataitem_ref_id(self, dataset_id: str, reference_id: str):
             {}, f"dataset/{dataset_id}/refloc/{reference_id}", requests.get
         )
 
+    @sanitize_string_args
     def predictions_ref_id(self, model_run_id: str, ref_id: str):
         """
         Returns Model Run info For Dataset Item by model_run_id and item reference_id.
diff --git a/nucleus/dataset.py b/nucleus/dataset.py
@@ -3,6 +3,7 @@
 import requests
 
 from nucleus.job import AsyncJob
+from nucleus.url_utils import sanitize_string_args
 from nucleus.utils import (
     convert_export_payload,
     format_dataset_item_response,
@@ -35,7 +36,6 @@
 )
 from .payload_constructor import construct_model_run_creation_payload
 
-
 WARN_FOR_LARGE_UPLOAD = 50000
 
 
@@ -83,6 +83,7 @@ def size(self) -> int:
     def items(self) -> List[DatasetItem]:
         return self._client.get_dataset_items(self.id)
 
+    @sanitize_string_args
     def autotag_scores(self, autotag_name, for_scores_greater_than=0):
         """Export the autotag scores above a threshold, largest scores first.
 
diff --git a/nucleus/url_utils.py b/nucleus/url_utils.py
@@ -0,0 +1,22 @@
+import urllib.request
+
+
+def sanitize_field(field):
+    return urllib.request.quote(field.encode("UTF-8"), safe="")
+
+
+def sanitize_string_args(function):
+    def sanitized_function(*args, **kwargs):
+        sanitized_args = []
+        sanitized_kwargs = {}
+        for arg in args:
+            if isinstance(arg, str):
+                arg = sanitize_field(arg)
+            sanitized_args.append(arg)
+        for key, value in kwargs.items():
+            if isinstance(value, str):
+                value = sanitize_field(value)
+            sanitized_kwargs[key] = value
+        return function(*sanitized_args, **sanitized_kwargs)
+
+    return sanitized_function
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ exclude = '''
 
 [tool.poetry]
 name = "scale-nucleus"
-version = "0.1.10"
+version = "0.1.11"
 description = "The official Python client library for Nucleus, the Data Platform for AI"
 license =  "MIT"
 authors = ["Scale AI Nucleus Team <nucleusapi@scaleapi.com>"]
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -37,7 +37,6 @@
     TEST_IMG_URLS,
     TEST_POLYGON_ANNOTATIONS,
     TEST_SEGMENTATION_ANNOTATIONS,
-    TEST_SLICE_NAME,
     reference_id_from_url,
 )
 
@@ -344,6 +343,20 @@ def test_annotate_async_with_error(dataset: Dataset):
     assert "Item with id fake_garbage doesn" in str(job.errors())
 
 
+def test_append_with_special_chars(dataset):
+    url = TEST_IMG_URLS[0]
+    ref_id = "test/reference/id"
+    ds_items = [
+        DatasetItem(
+            image_location=url,
+            reference_id=ref_id,
+            metadata={"test": "metadata"},
+        ),
+    ]
+    dataset.append(ds_items)
+    dataset.refloc(ref_id)
+
+
 def test_append_and_export(dataset):
     # Dataset upload
     url = TEST_IMG_URLS[0]
diff --git a/tests/test_slice.py b/tests/test_slice.py
@@ -154,6 +154,7 @@ def sort_by_reference_id(items):
     )
 
 
+@pytest.mark.integration
 def test_slice_send_to_labeling(dataset):
     # Dataset upload
     ds_items = []

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,7 @@ def sort_by_reference_id(items):`
`154`	`154`	`)`
`155`	`155`
`156`	`156`
	`157`	`+@pytest.mark.integration`
`157`	`158`	`def test_slice_send_to_labeling(dataset):`
`158`	`159`	`# Dataset upload`
`159`	`160`	`ds_items = []`