Labelbox
diff --git a/‎CHANGELOG.md
Lines changed: 11 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎labelbox/__init__.py
Lines changed: 1 addition & 1 deletion b/‎labelbox/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎labelbox/data/serialization/labelbox_v1/label.py
Lines changed: 1 addition & 1 deletion b/‎labelbox/data/serialization/labelbox_v1/label.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎labelbox/schema/asset_attachment.py
Lines changed: 18 additions & 0 deletions b/‎labelbox/schema/asset_attachment.py
Lines changed: 18 additions & 0 deletions
diff --git a/‎labelbox/schema/data_row.py
Lines changed: 3 additions & 9 deletions b/‎labelbox/schema/data_row.py
Lines changed: 3 additions & 9 deletions
diff --git a/‎labelbox/schema/dataset.py
Lines changed: 79 additions & 33 deletions b/‎labelbox/schema/dataset.py
Lines changed: 79 additions & 33 deletions
diff --git a/‎labelbox/schema/project.py
Lines changed: 20 additions & 0 deletions b/‎labelbox/schema/project.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎tests/integration/bulk_import/conftest.py
Lines changed: 2 additions & 4 deletions b/‎tests/integration/bulk_import/conftest.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎tests/integration/bulk_import/test_bulk_import_request.py
Lines changed: 27 additions & 10 deletions b/‎tests/integration/bulk_import/test_bulk_import_request.py
Lines changed: 27 additions & 10 deletions
@@ -1,5 +1,16 @@
 # Changelog
 
+# Version 3.2.0 (2021-08-26)
+## Added
+* List `BulkImportRequest`s for a project with `Project.bulk_import_requests()`
+* Improvemens to `Dataset.create_data_rows()`
+    * Add attachments when bulk importing data rows
+    * Provide external ids when creating data rows from local files
+    * Get more informative error messages when the api rejects an import
+
+## Fix
+* Bug causing `project.label_generator()` to fail when projects had benchmarks
+
 # Version 3.1.0 (2021-08-18)
 ## Added
 * Support for new HTML attachment type
 
@@ -1,5 +1,5 @@
 name = "labelbox"
-__version__ = "3.1.0"
+__version__ = "3.2.0"
 
 from labelbox.schema.project import Project
 from labelbox.client import Client
 
@@ -131,7 +131,7 @@ class LBV1Label(BaseModel):
     seconds_to_label: Optional[float] = Extra('Seconds to Label')
     agreement: Optional[float] = Extra('Agreement')
     benchmark_agreement: Optional[float] = Extra('Benchmark Agreement')
-    benchmark_id: Optional[float] = Extra('Benchmark ID')
+    benchmark_id: Optional[str] = Extra('Benchmark ID')
     dataset_name: Optional[str] = Extra('Dataset Name')
     reviews: Optional[List[Review]] = Extra('Reviews')
     label_url: Optional[str] = Extra('View Label')
 
@@ -1,4 +1,5 @@
 from enum import Enum
+from typing import Dict
 
 from labelbox.orm.db_object import DbObject
 from labelbox.orm.model import Field
@@ -24,3 +25,20 @@ class AttachmentType(Enum):
 
     attachment_type = Field.String("attachment_type", "type")
     attachment_value = Field.String("attachment_value", "value")
+
+    @classmethod
+    def validate_attachment_json(cls, attachment_json: Dict[str, str]) -> None:
+        for required_key in ['type', 'value']:
+            if required_key not in attachment_json:
+                raise ValueError(
+                    f"Must provide a `{required_key}` key for each attachment. Found {attachment_json}."
+                )
+            cls.validate_attachment_type(attachment_json['type'])
+
+    @classmethod
+    def validate_attachment_type(cls, attachment_type: str) -> None:
+        valid_types = set(cls.AttachmentType.__members__)
+        if attachment_type not in valid_types:
+            raise ValueError(
+                f"meta_type must be one of {valid_types}. Found {attachment_type}"
+            )
@@ -42,10 +42,8 @@ class DataRow(DbObject, Updateable, BulkDeletable):
     labels = Relationship.ToMany("Label", True)
     attachments = Relationship.ToMany("AssetAttachment", False, "attachments")
 
-    supported_meta_types = supported_attachment_types = {
-        attachment_type.value
-        for attachment_type in AssetAttachment.AttachmentType
-    }
+    supported_meta_types = supported_attachment_types = set(
+        AssetAttachment.AttachmentType.__members__)
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -103,11 +101,7 @@ def create_attachment(self, attachment_type, attachment_value):
         Raises:
             ValueError: asset_type must be one of the supported types.
         """
-
-        if attachment_type not in self.supported_attachment_types:
-            raise ValueError(
-                f"meta_type must be one of {self.supported_attachment_types}. Found {attachment_type}"
-            )
+        AssetAttachment.validate_attachment_type(attachment_type)
 
         attachment_type_param = "type"
         attachment_value_param = "value"
 
@@ -1,3 +1,4 @@
+from labelbox import utils
 import os
 import json
 import logging
@@ -81,13 +82,17 @@ def create_data_rows(self, items):
         is uploaded to Labelbox and a DataRow referencing it is created.
 
         If an item is a `dict`, then it could support one of the two following structures
-            1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values.
-               At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value.
+            1. For static imagery, video, and text it should map `DataRow` field names to values.
+               At the minimum an `item` passed as a `dict` must contain a `row_data` key and value.
+               If the value for row_data is a local file path and the path exists,
+               then the local file will be uploaded to labelbox.
+
             2. For tiled imagery the dict must match the import structure specified in the link below
                https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
 
         >>> dataset.create_data_rows([
         >>>     {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
+        >>>     {DataRow.row_data:"/path/to/file1.jpg"},
         >>>     "path/to/file2.jpg",
         >>>     {"tileLayerUrl" : "http://", ...}
         >>>     ])
@@ -115,64 +120,105 @@ def create_data_rows(self, items):
         DataRow = Entity.DataRow
 
         def upload_if_necessary(item):
-            if isinstance(item, str):
-                item_url = self.client.upload_file(item)
-                # Convert item from str into a dict so it gets processed
-                # like all other dicts.
-                item = {DataRow.row_data: item_url, DataRow.external_id: item}
+            row_data = item['row_data']
+            if os.path.exists(row_data):
+                item_url = self.client.upload_file(item['row_data'])
+                item = {
+                    "row_data": item_url,
+                    "external_id": item.get('external_id', item['row_data']),
+                    "attachments": item.get('attachments', [])
+                }
             return item
 
-        with ThreadPoolExecutor(file_upload_thread_count) as executor:
-            futures = [
-                executor.submit(upload_if_necessary, item) for item in items
-            ]
-            items = [future.result() for future in as_completed(futures)]
-
-        def convert_item(item):
-            # Don't make any changes to tms data
-            if "tileLayerUrl" in item:
-                return item
-            # Convert string names to fields.
-            item = {
-                key if isinstance(key, Field) else DataRow.field(key): value
-                for key, value in item.items()
-            }
+        def validate_attachments(item):
+            attachments = item.get('attachments')
+            if attachments:
+                if isinstance(attachments, list):
+                    for attachment in attachments:
+                        Entity.AssetAttachment.validate_attachment_json(
+                            attachment)
+                else:
+                    raise ValueError(
+                        f"Attachments must be a list. Found {type(attachments)}"
+                    )
+            return attachments
+
+        def format_row(item):
+            # Formats user input into a consistent dict structure
+            if isinstance(item, dict):
+                # Convert fields to strings
+                item = {
+                    key.name if isinstance(key, Field) else key: value
+                    for key, value in item.items()
+                }
+            elif isinstance(item, str):
+                # The main advantage of using a string over a dict is that the user is specifying
+                # that the file should exist locally.
+                # That info is lost after this section so we should check for it here.
+                if not os.path.exists(item):
+                    raise ValueError(f"Filepath {item} does not exist.")
+                item = {"row_data": item, "external_id": item}
+            return item
 
-            if DataRow.row_data not in item:
+        def validate_keys(item):
+            if 'row_data' not in item:
                 raise InvalidQueryError(
-                    "DataRow.row_data missing when creating DataRow.")
+                    "`row_data` missing when creating DataRow.")
 
-            invalid_keys = set(item) - set(DataRow.fields())
+            invalid_keys = set(item) - {
+                *{f.name for f in DataRow.fields()}, 'attachments'
+            }
             if invalid_keys:
                 raise InvalidAttributeError(DataRow, invalid_keys)
+            return item
+
+        def convert_item(item):
+            # Don't make any changes to tms data
+            if "tileLayerUrl" in item:
+                validate_attachments(item)
+                return item
+            # Convert all payload variations into the same dict format
+            item = format_row(item)
+            # Make sure required keys exist (and there are no extra keys)
+            validate_keys(item)
+            # Make sure attachments are valid
+            validate_attachments(item)
+            # Upload any local file paths
+            item = upload_if_necessary(item)
 
-            # Item is valid, convert it to a dict {graphql_field_name: value}
-            # Need to change the name of DataRow.row_data to "data"
             return {
-                "data" if key == DataRow.row_data else key.graphql_name: value
+                "data" if key == "row_data" else utils.camel_case(key): value
                 for key, value in item.items()
             }
 
+        if not isinstance(items, list):
+            raise ValueError(
+                f"Must pass a list to create_data_rows. Found {type(items)}")
+
+        with ThreadPoolExecutor(file_upload_thread_count) as executor:
+            futures = [executor.submit(convert_item, item) for item in items]
+            items = [future.result() for future in as_completed(futures)]
+
         # Prepare and upload the desciptor file
-        items = [convert_item(item) for item in items]
         data = json.dumps(items)
         descriptor_url = self.client.upload_data(data)
-
         # Create data source
         dataset_param = "datasetId"
         url_param = "jsonUrl"
         query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
             appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
-            ){ taskId accepted } } """ % (dataset_param, url_param,
-                                          dataset_param, url_param)
+            ){ taskId accepted errorMessage } } """ % (dataset_param, url_param,
+                                                       dataset_param, url_param)
+
         res = self.client.execute(query_str, {
             dataset_param: self.uid,
             url_param: descriptor_url
         })
         res = res["appendRowsToDataset"]
         if not res["accepted"]:
+            msg = res['errorMessage']
             raise InvalidQueryError(
-                "Server did not accept DataRow creation request")
+                f"Server did not accept DataRow creation request. {msg}")
 
         # Fetch and return the task.
         task_id = res["taskId"]
 
@@ -582,6 +582,26 @@ def enable_model_assisted_labeling(self, toggle: bool = True) -> bool:
         return res["project"]["showPredictionsToLabelers"][
             "showingPredictionsToLabelers"]
 
+    def bulk_import_requests(self):
+        """ Returns bulk import request objects which are used in model-assisted labeling.
+        These are returned with the oldest first, and most recent last.
+        """
+
+        id_param = "project_id"
+        query_str = """query ListAllImportRequestsPyApi($%s: ID!) {
+            bulkImportRequests (
+                where: { projectId: $%s }
+                skip: %%d
+                first: %%d
+            ) {
+                %s
+            }
+        }""" % (id_param, id_param,
+                query.results_query_part(Entity.BulkImportRequest))
+        return PaginatedCollection(self.client, query_str,
+                                   {id_param: str(self.uid)},
+                                   ["bulkImportRequests"], BulkImportRequest)
+
     def upload_annotations(
             self,
             name: str,
 
@@ -6,8 +6,6 @@
 from labelbox.schema.labeling_frontend import LabelingFrontend
 from labelbox.schema.annotation_import import MALPredictionImport
 
-IMG_URL = "https://picsum.photos/200/300"
-
 
 @pytest.fixture
 def ontology():
@@ -103,7 +101,7 @@ def ontology():
 
 
 @pytest.fixture
-def configured_project(client, ontology, rand_gen):
+def configured_project(client, ontology, rand_gen, image_url):
     project = client.create_project(name=rand_gen(str))
     dataset = client.create_dataset(name=rand_gen(str))
     editor = list(
@@ -112,7 +110,7 @@ def configured_project(client, ontology, rand_gen):
     project.setup(editor, ontology)
     data_row_ids = []
     for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
-        data_row_ids.append(dataset.create_data_row(row_data=IMG_URL).uid)
+        data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
     project.datasets.connect(dataset)
     project.data_row_ids = data_row_ids
     yield project
 
@@ -149,21 +149,38 @@ def assert_file_content(url: str, predictions):
     assert response.text == ndjson.dumps(predictions)
 
 
-def test_delete(client, configured_project, predictions):
+def test_project_bulk_import_requests(client, configured_project, predictions):
+    result = configured_project.bulk_import_requests()
+    assert len(list(result)) == 0
+
+    name = str(uuid.uuid4())
+    bulk_import_request = configured_project.upload_annotations(
+        name=name, annotations=predictions)
+    bulk_import_request.wait_until_done()
 
-    id_param = "project_id"
-    query_str = """query bulk_import_requestsPyApi($%s: ID!) {bulkImportRequests(where: {projectId: $%s}) {id}}""" % (
-        id_param, id_param)
+    name = str(uuid.uuid4())
+    bulk_import_request = configured_project.upload_annotations(
+        name=name, annotations=predictions)
+    bulk_import_request.wait_until_done()
+
+    name = str(uuid.uuid4())
+    bulk_import_request = configured_project.upload_annotations(
+        name=name, annotations=predictions)
+    bulk_import_request.wait_until_done()
+
+    result = configured_project.bulk_import_requests()
+    assert len(list(result)) == 3
+
+
+def test_delete(client, configured_project, predictions):
     name = str(uuid.uuid4())
 
     bulk_import_request = configured_project.upload_annotations(
         name=name, annotations=predictions)
     bulk_import_request.wait_until_done()
-    all_import_requests = client.execute(query_str,
-                                         {id_param: configured_project.uid})
-    assert len(all_import_requests['bulkImportRequests']) == 1
+    all_import_requests = configured_project.bulk_import_requests()
+    assert len(list(all_import_requests)) == 1
 
     bulk_import_request.delete()
-    all_import_requests = client.execute(query_str,
-                                         {id_param: configured_project.uid})
-    assert len(all_import_requests['bulkImportRequests']) == 0
+    all_import_requests = configured_project.bulk_import_requests()
+    assert len(list(all_import_requests)) == 0