diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py index 7908bc242..1a78127e1 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py @@ -32,18 +32,8 @@ from .classification import Radio from .classification import Text -from .data import AudioData -from .data import ConversationData -from .data import DicomData -from .data import DocumentData -from .data import HTMLData -from .data import ImageData +from .data import GenericDataRowData from .data import MaskData -from .data import TextData -from .data import VideoData -from .data import LlmPromptResponseCreationData -from .data import LlmPromptCreationData -from .data import LlmResponseCreationData from .label import Label from .collection import LabelGenerator diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index d90204309..2e76176a8 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -40,47 +40,6 @@ def _assign_ids(label: Label): self._fns["assign_feature_schema_ids"] = _assign_ids return self - def add_url_to_data( - self, signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that signs urls as data is accessed - """ - - def _add_url_to_data(label: Label): - label.add_url_to_data(signer) - return label - - self._fns["add_url_to_data"] = _add_url_to_data - return self - - def add_to_dataset( - self, dataset: "Entity.Dataset", signer: Callable[[bytes], str] - ) -> "LabelGenerator": - """ - Creates data rows from each labels data object and attaches the data to the given dataset. - Updates the label's data object to have the same external_id and uid as the data row. - - Args: - dataset: labelbox dataset object to add the new data row to - signer: A function that accepts bytes and returns a signed url. - Returns: - LabelGenerator that updates references to the new data rows as data is accessed - """ - - def _add_to_dataset(label: Label): - label.create_data_row(dataset, signer) - return label - - self._fns["assign_datarow_ids"] = _add_to_dataset - return self - def add_url_to_masks( self, signer: Callable[[bytes], str] ) -> "LabelGenerator": diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py index 2522b2741..8d5e7289b 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/__init__.py @@ -1,12 +1,2 @@ -from .audio import AudioData -from .conversation import ConversationData -from .dicom import DicomData -from .document import DocumentData -from .html import HTMLData -from .raster import ImageData from .raster import MaskData -from .text import TextData -from .video import VideoData -from .llm_prompt_response_creation import LlmPromptResponseCreationData -from .llm_prompt_creation import LlmPromptCreationData -from .llm_response_creation import LlmResponseCreationData +from .generic_data_row_data import GenericDataRowData diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py deleted file mode 100644 index 916fca99d..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/audio.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class AudioData(BaseData, _NoCoercionMixin): - class_name: Literal["AudioData"] = "AudioData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py deleted file mode 100644 index ef6507dca..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/conversation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py b/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py deleted file mode 100644 index ae4c377dc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/dicom.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DicomData(BaseData, _NoCoercionMixin): - class_name: Literal["DicomData"] = "DicomData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py b/libs/labelbox/src/labelbox/data/annotation_types/data/document.py deleted file mode 100644 index 810a3ed3e..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/document.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class DocumentData(BaseData, _NoCoercionMixin): - class_name: Literal["DocumentData"] = "DocumentData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py b/libs/labelbox/src/labelbox/data/annotation_types/data/html.py deleted file mode 100644 index 7a78fcb7b..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/html.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class HTMLData(BaseData, _NoCoercionMixin): - class_name: Literal["HTMLData"] = "HTMLData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py deleted file mode 100644 index a1b0450bc..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptCreationData"] = "LlmPromptCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py deleted file mode 100644 index a8dfce894..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_prompt_response_creation.py +++ /dev/null @@ -1,9 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmPromptResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmPromptResponseCreationData"] = ( - "LlmPromptResponseCreationData" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py b/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py deleted file mode 100644 index a8963ed3f..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/llm_response_creation.py +++ /dev/null @@ -1,7 +0,0 @@ -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin -from .base_data import BaseData - - -class LlmResponseCreationData(BaseData, _NoCoercionMixin): - class_name: Literal["LlmResponseCreationData"] = "LlmResponseCreationData" diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py index cfdc4e2f1..fc9acd50f 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/data/raster.py @@ -11,8 +11,9 @@ from requests.exceptions import ConnectTimeout from typing_extensions import Literal +from pydantic import BaseModel, model_validator, ConfigDict + from ..types import TypedArray -from .base_data import BaseData class RasterData(BaseModel, ABC): @@ -222,6 +223,3 @@ class MaskData(RasterData): url: Optional[str] = None arr: Optional[TypedArray[Literal['uint8']]] = None """ - - -class ImageData(RasterData, BaseData): ... diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py b/libs/labelbox/src/labelbox/data/annotation_types/data/text.py deleted file mode 100644 index cabad4836..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/text.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Callable, Optional - -import requests -from google.api_core import retry -from lbox.exceptions import InternalServerError -from pydantic import ConfigDict, model_validator -from requests.exceptions import ConnectTimeout - -from labelbox.typing_imports import Literal -from labelbox.utils import _NoCoercionMixin - -from .base_data import BaseData - - -class TextData(BaseData, _NoCoercionMixin): - """ - Represents text data. Requires arg file_path, text, or url - - >>> TextData(text="") - - Args: - file_path (str) - text (str) - url (str) - """ - - class_name: Literal["TextData"] = "TextData" - file_path: Optional[str] = None - text: Optional[str] = None - url: Optional[str] = None - model_config = ConfigDict(extra="forbid") - - @property - def value(self) -> str: - """ - Property that unifies the data access pattern for all references to the text. - - Returns: - string representation of the text - """ - if self.text: - return self.text - elif self.file_path: - with open(self.file_path, "r") as file: - text = file.read() - self.text = text - return text - elif self.url: - text = self.fetch_remote() - self.text = text - return text - else: - raise ValueError("Must set either url, file_path or im_bytes") - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry( - deadline=15.0, - predicate=retry.if_exception_type(ConnectTimeout, InternalServerError), - ) - def fetch_remote(self) -> str: - """ - Method for accessing url. - - If url is not publicly accessible or requires another access pattern - simply override this function - """ - response = requests.get(self.url) - if response.status_code in [500, 502, 503, 504]: - raise InternalServerError(response.text) - response.raise_for_status() - return response.text - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other text references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the text - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.text is not None: - self.url = signer(self.text.encode()) - else: - raise ValueError( - "One of url, im_bytes, file_path, numpy must not be None." - ) - return self.url - - @model_validator(mode="after") - def validate_date(self, values): - file_path = self.file_path - text = self.text - url = self.url - uid = self.uid - global_key = self.global_key - if uid == file_path == text == url == global_key is None: - raise ValueError( - "One of `file_path`, `text`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"TextData(file_path={self.file_path}," - f"text={self.text[:30] + '...' if self.text is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py b/libs/labelbox/src/labelbox/data/annotation_types/data/video.py deleted file mode 100644 index 0f40911d8..000000000 --- a/libs/labelbox/src/labelbox/data/annotation_types/data/video.py +++ /dev/null @@ -1,173 +0,0 @@ -import logging -import os -import urllib.request -from typing import Callable, Dict, Generator, Optional, Tuple -from typing_extensions import Literal -from uuid import uuid4 - -import cv2 -import numpy as np -from google.api_core import retry - -from .base_data import BaseData -from ..types import TypedArray - -from pydantic import ConfigDict, model_validator - -logger = logging.getLogger(__name__) - - -class VideoData(BaseData): - """ - Represents video - """ - - file_path: Optional[str] = None - url: Optional[str] = None - frames: Optional[Dict[int, TypedArray[Literal["uint8"]]]] = None - # Required for discriminating between data types - model_config = ConfigDict(extra="forbid") - - def load_frames(self, overwrite: bool = False) -> None: - """ - Loads all frames into memory at once in order to access in non-sequential order. - This will use a lot of memory, especially for longer videos - - Args: - overwrite: Replace existing frames - """ - if self.frames and not overwrite: - return - - for count, frame in self.frame_generator(): - if self.frames is None: - self.frames = {} - self.frames[count] = frame - - @property - def value(self): - return self.frame_generator() - - def frame_generator( - self, cache_frames=False, download_dir="/tmp" - ) -> Generator[Tuple[int, np.ndarray], None, None]: - """ - A generator for accessing individual frames in a video. - - Args: - cache_frames (bool): Whether or not to cache frames while iterating through the video. - download_dir (str): Directory to save the video to. Defaults to `/tmp` dir - """ - if self.frames is not None: - for idx, frame in self.frames.items(): - yield idx, frame - return - elif self.url and not self.file_path: - file_path = os.path.join(download_dir, f"{uuid4()}.mp4") - logger.info("Downloading the video locally to %s", file_path) - self.fetch_remote(file_path) - self.file_path = file_path - - vidcap = cv2.VideoCapture(self.file_path) - - success, frame = vidcap.read() - count = 0 - if cache_frames: - self.frames = {} - while success: - frame = frame[:, :, ::-1] - yield count, frame - if cache_frames: - self.frames[count] = frame - success, frame = vidcap.read() - count += 1 - - def __getitem__(self, idx: int) -> np.ndarray: - if self.frames is None: - raise ValueError( - "Cannot select by index without iterating over the entire video or loading all frames." - ) - return self.frames[idx] - - def set_fetch_fn(self, fn): - object.__setattr__(self, "fetch_remote", lambda: fn(self)) - - @retry.Retry(deadline=15.0) - def fetch_remote(self, local_path) -> None: - """ - Method for downloading data from self.url - - If url is not publicly accessible or requires another access pattern - simply override this function - - Args: - local_path: Where to save the thing too. - """ - urllib.request.urlretrieve(self.url, local_path) - - @retry.Retry(deadline=15.0) - def create_url(self, signer: Callable[[bytes], str]) -> None: - """ - Utility for creating a url from any of the other video references. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - url for the video - """ - if self.url is not None: - return self.url - elif self.file_path is not None: - with open(self.file_path, "rb") as file: - self.url = signer(file.read()) - elif self.frames is not None: - self.file_path = self.frames_to_video(self.frames) - self.url = self.create_url(signer) - else: - raise ValueError("One of url, file_path, frames must not be None.") - return self.url - - def frames_to_video( - self, frames: Dict[int, np.ndarray], fps=20, save_dir="/tmp" - ) -> str: - """ - Compresses the data by converting a set of individual frames to a single video. - - """ - file_path = os.path.join(save_dir, f"{uuid4()}.mp4") - out = None - for key in frames.keys(): - frame = frames[key] - if out is None: - out = cv2.VideoWriter( - file_path, - cv2.VideoWriter_fourcc(*"MP4V"), - fps, - frame.shape[:2], - ) - out.write(frame) - if out is None: - return - out.release() - return file_path - - @model_validator(mode="after") - def validate_data(self): - file_path = self.file_path - url = self.url - frames = self.frames - uid = self.uid - global_key = self.global_key - - if uid == file_path == frames == url == global_key is None: - raise ValueError( - "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." - ) - return self - - def __repr__(self) -> str: - return ( - f"VideoData(file_path={self.file_path}," - f"frames={'...' if self.frames is not None else None}," - f"url={self.url})" - ) diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py index 7eef43f31..8ae05f898 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/label.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py @@ -3,10 +3,7 @@ import warnings import labelbox -from labelbox.data.annotation_types.data.generic_data_row_data import ( - GenericDataRowData, -) -from labelbox.data.annotation_types.data.tiled_image import TiledImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from labelbox.schema import ontology from ...annotated_types import Cuid @@ -14,42 +11,13 @@ from .relationship import RelationshipAnnotation from .llm_prompt_response.prompt import PromptClassificationAnnotation from .classification import ClassificationAnswer -from .data import ( - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - ImageData, - TextData, - VideoData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, -) from .geometry import Mask from .metrics import ScalarMetric, ConfusionMatrixMetric from .video import VideoClassificationAnnotation from .video import VideoObjectAnnotation, VideoMaskAnnotation from .mmc import MessageEvaluationTaskAnnotation from ..ontology import get_feature_schema_lookup -from pydantic import BaseModel, field_validator, model_serializer - -DataType = Union[ - VideoData, - ImageData, - TextData, - TiledImageData, - AudioData, - ConversationData, - DicomData, - DocumentData, - HTMLData, - LlmPromptCreationData, - LlmPromptResponseCreationData, - LlmResponseCreationData, - GenericDataRowData, -] +from pydantic import BaseModel, field_validator class Label(BaseModel): @@ -67,14 +35,13 @@ class Label(BaseModel): Args: uid: Optional Label Id in Labelbox - data: Data of Label, Image, Video, Text or dict with a single key uid | global_key | external_id. - Note use of classes as data is deprecated. Use GenericDataRowData or dict with a single key instead. + data: GenericDataRowData or dict with a single key uid | global_key | external_id. annotations: List of Annotations in the label extra: additional context """ uid: Optional[Cuid] = None - data: DataType + data: Union[GenericDataRowData, MaskData] annotations: List[ Union[ ClassificationAnnotation, @@ -94,13 +61,6 @@ class Label(BaseModel): def validate_data(cls, data): if isinstance(data, Dict): return GenericDataRowData(**data) - elif isinstance(data, GenericDataRowData): - return data - else: - warnings.warn( - f"Using {type(data).__name__} class for label.data is deprecated. " - "Use a dict or an instance of GenericDataRowData instead." - ) return data def object_annotations(self) -> List[ObjectAnnotation]: @@ -128,19 +88,6 @@ def frame_annotations( frame_dict[annotation.frame].append(annotation) return frame_dict - def add_url_to_data(self, signer) -> "Label": - """ - Creates signed urls for the data - Only uploads url if one doesn't already exist. - - Args: - signer: A function that accepts bytes and returns a signed url. - Returns: - Label with updated references to new data url - """ - self.data.create_url(signer) - return self - def add_url_to_masks(self, signer) -> "Label": """ Creates signed urls for all masks in the Label. diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py index 2c3215265..fedf4d91b 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/classification.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union, Optional -from labelbox.data.annotation_types import ImageData, TextData, VideoData +from labelbox.data.annotation_types import GenericDataRowData from labelbox.data.mixins import ( ConfidenceMixin, CustomMetric, @@ -232,7 +232,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDText": @@ -264,7 +264,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, @@ -304,7 +304,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, message_id: str, confidence: Optional[float] = None, ) -> "NDRadio": @@ -427,7 +427,7 @@ def from_common( annotation: Union[ ClassificationAnnotation, VideoClassificationAnnotation ], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: classify_obj = cls.lookup_classification(annotation) if classify_obj is None: @@ -475,7 +475,7 @@ def to_common( def from_common( cls, annotation: Union[PromptClassificationAnnotation], - data: Union[VideoData, TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]: return NDPromptText.from_common( str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py index 7039ae834..ffaefb4d7 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py @@ -14,7 +14,6 @@ ) from ...annotation_types.video import VideoObjectAnnotation, VideoMaskAnnotation from ...annotation_types.collection import LabelCollection, LabelGenerator -from ...annotation_types.data import DicomData, ImageData, TextData, VideoData from ...annotation_types.data.generic_data_row_data import GenericDataRowData from ...annotation_types.label import Label from ...annotation_types.ner import TextEntity, ConversationEntity @@ -214,46 +213,9 @@ def _generate_annotations( yield Label( annotations=annotations, - data=self._infer_media_type(group.data_row, annotations), + data=GenericDataRowData, ) - def _infer_media_type( - self, - data_row: DataRow, - annotations: List[ - Union[ - TextEntity, - ConversationEntity, - VideoClassificationAnnotation, - DICOMObjectAnnotation, - VideoObjectAnnotation, - ObjectAnnotation, - ClassificationAnnotation, - ScalarMetric, - ConfusionMatrixMetric, - ] - ], - ) -> Union[TextData, VideoData, ImageData]: - if len(annotations) == 0: - raise ValueError("Missing annotations while inferring media type") - - types = {type(annotation) for annotation in annotations} - data = GenericDataRowData - if (TextEntity in types) or (ConversationEntity in types): - data = TextData - elif ( - VideoClassificationAnnotation in types - or VideoObjectAnnotation in types - ): - data = VideoData - elif DICOMObjectAnnotation in types: - data = DicomData - - if data_row.id: - return data(uid=data_row.id) - else: - return data(global_key=data_row.global_key) - @staticmethod def _get_consecutive_frames( frames_indices: List[int], diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py index b28e575cf..f8b522ab5 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/metric.py @@ -1,6 +1,6 @@ from typing import Optional, Union, Type -from labelbox.data.annotation_types.data import ImageData, TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, @@ -51,7 +51,7 @@ def to_common(self) -> ConfusionMatrixMetric: @classmethod def from_common( - cls, metric: ConfusionMatrixMetric, data: Union[TextData, ImageData] + cls, metric: ConfusionMatrixMetric, data: GenericDataRowData ) -> "NDConfusionMatrixMetric": return cls( uuid=metric.extra.get("uuid"), @@ -83,7 +83,7 @@ def to_common(self) -> ScalarMetric: @classmethod def from_common( - cls, metric: ScalarMetric, data: Union[TextData, ImageData] + cls, metric: ScalarMetric, data: GenericDataRowData ) -> "NDScalarMetric": return cls( uuid=metric.extra.get("uuid"), @@ -107,7 +107,7 @@ def to_common( def from_common( cls, annotation: Union[ScalarMetric, ConfusionMatrixMetric], - data: Union[TextData, ImageData], + data: GenericDataRowData, ) -> Union[NDScalarMetric, NDConfusionMatrixMetric]: obj = cls.lookup_object(annotation) return obj.from_common(annotation, data) diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py index 74d185f45..b2dcfb5b4 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/mmc.py @@ -9,6 +9,7 @@ MessageRankingTask, MessageEvaluationTaskAnnotation, ) +from ...annotation_types import GenericDataRowData class MessageTaskData(_CamelCaseMixin): @@ -35,7 +36,7 @@ def to_common(self) -> MessageEvaluationTaskAnnotation: def from_common( cls, annotation: MessageEvaluationTaskAnnotation, - data: Any, # Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDMessageTask": return cls( uuid=str(annotation._uuid), diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py index 91abface6..1bcba7a89 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Tuple, Union, Optional import base64 +from labelbox.data.annotation_types.data.raster import MaskData from labelbox.data.annotation_types.ner.conversation_entity import ( ConversationEntity, ) @@ -21,9 +22,9 @@ from PIL import Image from labelbox.data.annotation_types import feature -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData -from ...annotation_types.data import ImageData, TextData, MaskData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.ner import ( DocumentEntity, DocumentTextSelection, @@ -96,7 +97,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPoint": @@ -161,7 +162,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDLine": @@ -245,7 +246,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDPolygon": @@ -282,7 +283,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -329,7 +330,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDRectangle": @@ -508,7 +509,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[VideoObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -545,7 +546,7 @@ def to_common(self, name: str, feature_schema_id: Cuid): def from_common( cls, segments: List[DICOMObjectAnnotation], - data: VideoData, + data: GenericDataRowData, name: str, feature_schema_id: Cuid, extra: Dict[str, Any], @@ -601,7 +602,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDMask": @@ -706,7 +707,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDTextEntity": @@ -743,7 +744,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDDocumentEntity": @@ -778,7 +779,7 @@ def from_common( name: str, feature_schema_id: Cuid, extra: Dict[str, Any], - data: Union[ImageData, TextData], + data: GenericDataRowData, confidence: Optional[float] = None, custom_metrics: Optional[List[CustomMetric]] = None, ) -> "NDConversationEntity": @@ -836,7 +837,7 @@ def from_common( List[List[VideoObjectAnnotation]], VideoMaskAnnotation, ], - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> Union[ NDLine, NDPoint, diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py index 94c8e9879..d558ac244 100644 --- a/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py +++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/relationship.py @@ -1,7 +1,7 @@ from typing import Union from pydantic import BaseModel from .base import NDAnnotation, DataRow -from ...annotation_types.data import ImageData, TextData +from ...annotation_types.data import GenericDataRowData from ...annotation_types.relationship import RelationshipAnnotation from ...annotation_types.relationship import Relationship from .objects import NDObjectType @@ -40,7 +40,7 @@ def to_common( def from_common( cls, annotation: RelationshipAnnotation, - data: Union[ImageData, TextData], + data: GenericDataRowData, ) -> "NDRelationship": relationship = annotation.value return cls( diff --git a/libs/labelbox/src/labelbox/utils.py b/libs/labelbox/src/labelbox/utils.py index c76ce188f..dcf51be82 100644 --- a/libs/labelbox/src/labelbox/utils.py +++ b/libs/labelbox/src/labelbox/utils.py @@ -87,8 +87,8 @@ class _NoCoercionMixin: when serializing the object. Example: - class ConversationData(BaseData, _NoCoercionMixin): - class_name: Literal["ConversationData"] = "ConversationData" + class GenericDataRowData(BaseData, _NoCoercionMixin): + class_name: Literal["GenericDataRowData"] = "GenericDataRowData" """ diff --git a/libs/labelbox/tests/data/annotation_types/data/test_raster.py b/libs/labelbox/tests/data/annotation_types/data/test_raster.py index 6bc8f2bbf..209419aed 100644 --- a/libs/labelbox/tests/data/annotation_types/data/test_raster.py +++ b/libs/labelbox/tests/data/annotation_types/data/test_raster.py @@ -5,34 +5,28 @@ import pytest from PIL import Image -from labelbox.data.annotation_types.data import ImageData +from labelbox.data.annotation_types.data import GenericDataRowData, MaskData from pydantic import ValidationError def test_validate_schema(): with pytest.raises(ValidationError): - data = ImageData() + MaskData() def test_im_bytes(): data = (np.random.random((32, 32, 3)) * 255).astype(np.uint8) im_bytes = BytesIO() Image.fromarray(data).save(im_bytes, format="PNG") - raster_data = ImageData(im_bytes=im_bytes.getvalue()) + raster_data = MaskData(im_bytes=im_bytes.getvalue()) data_ = raster_data.value assert np.all(data == data_) def test_im_url(): - raster_data = ImageData(url="https://picsum.photos/id/829/200/300") - data_ = raster_data.value - assert data_.shape == (300, 200, 3) - - -def test_im_path(): - img_path = "/tmp/img.jpg" - urllib.request.urlretrieve("https://picsum.photos/id/829/200/300", img_path) - raster_data = ImageData(file_path=img_path) + raster_data = MaskData( + uid="test", url="https://picsum.photos/id/829/200/300" + ) data_ = raster_data.value assert data_.shape == (300, 200, 3) @@ -42,14 +36,11 @@ def test_ref(): uid = "uid" metadata = [] media_attributes = {} - data = ImageData( - im_bytes=b"", - external_id=external_id, + data = GenericDataRowData( uid=uid, metadata=metadata, media_attributes=media_attributes, ) - assert data.external_id == external_id assert data.uid == uid assert data.media_attributes == media_attributes assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_text.py b/libs/labelbox/tests/data/annotation_types/data/test_text.py deleted file mode 100644 index 865f93e65..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_text.py +++ /dev/null @@ -1,55 +0,0 @@ -import os - -import pytest - -from labelbox.data.annotation_types import TextData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = TextData() - - -def test_text(): - text = "hello world" - metadata = [] - media_attributes = {} - text_data = TextData( - text=text, metadata=metadata, media_attributes=media_attributes - ) - assert text_data.text == text - - -def test_url(): - url = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/sample3.txt" - text_data = TextData(url=url) - text = text_data.value - assert len(text) == 3541 - - -def test_file(tmpdir): - content = "foo bar baz" - file = "hello.txt" - dir = tmpdir.mkdir("data") - dir.join(file).write(content) - text_data = TextData(file_path=os.path.join(dir.strpath, file)) - assert len(text_data.value) == len(content) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - metadata = [] - media_attributes = {} - data = TextData( - text="hello world", - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/data/test_video.py b/libs/labelbox/tests/data/annotation_types/data/test_video.py deleted file mode 100644 index 5fd77c2c8..000000000 --- a/libs/labelbox/tests/data/annotation_types/data/test_video.py +++ /dev/null @@ -1,73 +0,0 @@ -import numpy as np -import pytest - -from labelbox.data.annotation_types import VideoData -from pydantic import ValidationError - - -def test_validate_schema(): - with pytest.raises(ValidationError): - data = VideoData() - - -def test_frames(): - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - video_data = VideoData(frames=data) - for idx, frame in video_data.frame_generator(): - assert idx in data - assert np.all(frame == data[idx]) - - -def test_file_path(): - path = "tests/integration/media/cat.mp4" - raster_data = VideoData(file_path=path) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 29 frames - assert set(frame_indices) == set(list(range(28))) - - -def test_file_url(): - url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerMeltdowns.mp4" - raster_data = VideoData(url=url) - - with pytest.raises(ValueError): - raster_data[0] - - raster_data.load_frames() - raster_data[0] - - frame_indices = list(raster_data.frames.keys()) - # 362 frames - assert set(frame_indices) == set(list(range(361))) - - -def test_ref(): - external_id = "external_id" - uid = "uid" - data = { - x: (np.random.random((32, 32, 3)) * 255).astype(np.uint8) - for x in range(5) - } - metadata = [] - media_attributes = {} - data = VideoData( - frames=data, - external_id=external_id, - uid=uid, - metadata=metadata, - media_attributes=media_attributes, - ) - assert data.external_id == external_id - assert data.uid == uid - assert data.media_attributes == media_attributes - assert data.metadata == metadata diff --git a/libs/labelbox/tests/data/annotation_types/test_collection.py b/libs/labelbox/tests/data/annotation_types/test_collection.py index 1c9cd669e..f818b94ff 100644 --- a/libs/labelbox/tests/data/annotation_types/test_collection.py +++ b/libs/labelbox/tests/data/annotation_types/test_collection.py @@ -7,19 +7,21 @@ from labelbox.data.annotation_types import ( LabelGenerator, ObjectAnnotation, - ImageData, - MaskData, Line, Mask, Point, Label, + GenericDataRowData, + MaskData, ) from labelbox import OntologyBuilder, Tool @pytest.fixture def list_of_labels(): - return [Label(data=ImageData(url="http://someurl")) for _ in range(5)] + return [ + Label(data=GenericDataRowData(uid="http://someurl")) for _ in range(5) + ] @pytest.fixture @@ -73,7 +75,7 @@ def test_conversion(list_of_labels): def test_adding_schema_ids(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=GenericDataRowData(uid="123456"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -91,37 +93,9 @@ def test_adding_schema_ids(): assert next(generator).annotations[0].feature_schema_id == feature_schema_id -def test_adding_urls(signer): - label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_url_to_data(signer(uuid)) - assert label.data.url != uuid - assert next(generator).data.url == uuid - assert label.data.url == uuid - - -def test_adding_to_dataset(signer): - dataset = FakeDataset() - label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), - annotations=[], - ) - uuid = str(uuid4()) - generator = LabelGenerator([label]).add_to_dataset(dataset, signer(uuid)) - assert label.data.url != uuid - generated_label = next(generator) - assert generated_label.data.url == uuid - assert generated_label.data.external_id is not None - assert generated_label.data.uid == dataset.uid - assert label.data.url == uuid - - def test_adding_to_masks(signer): label = Label( - data=ImageData(arr=np.random.random((32, 32, 3)).astype(np.uint8)), + data=GenericDataRowData(uid="12345"), annotations=[ ObjectAnnotation( name="1234", diff --git a/libs/labelbox/tests/data/annotation_types/test_label.py b/libs/labelbox/tests/data/annotation_types/test_label.py index 5bdfb6bde..8439837ed 100644 --- a/libs/labelbox/tests/data/annotation_types/test_label.py +++ b/libs/labelbox/tests/data/annotation_types/test_label.py @@ -17,7 +17,7 @@ ObjectAnnotation, Point, Line, - ImageData, + MaskData, Label, ) import pytest @@ -26,7 +26,9 @@ def test_schema_assignment_geometry(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -51,7 +53,7 @@ def test_schema_assignment_classification(): option_name = "my_option" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ClassificationAnnotation( value=Radio(answer=ClassificationAnswer(name=option_name)), @@ -102,7 +104,7 @@ def test_schema_assignment_subclass(): value=Radio(answer=ClassificationAnswer(name=option_name)), ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -167,7 +169,9 @@ def test_highly_nested(): ], ) label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData( + arr=np.ones((32, 32, 3), dtype=np.uint8), global_key="test" + ), annotations=[ ObjectAnnotation( value=Line(points=[Point(x=1, y=2), Point(x=2, y=2)]), @@ -230,7 +234,7 @@ def test_highly_nested(): def test_schema_assignment_confidence(): name = "line_feature" label = Label( - data=ImageData(arr=np.ones((32, 32, 3), dtype=np.uint8)), + data=MaskData(arr=np.ones((32, 32, 3), dtype=np.uint8), uid="test"), annotations=[ ObjectAnnotation( value=Line( @@ -252,10 +256,10 @@ def test_initialize_label_no_coercion(): value=lb_types.ConversationEntity(start=0, end=8, message_id="4"), ) label = Label( - data=lb_types.ConversationData(global_key=global_key), + data=lb_types.GenericDataRowData(global_key=global_key), annotations=[ner_annotation], ) - assert isinstance(label.data, lb_types.ConversationData) + assert isinstance(label.data, lb_types.GenericDataRowData) assert label.data.global_key == global_key diff --git a/libs/labelbox/tests/data/annotation_types/test_metrics.py b/libs/labelbox/tests/data/annotation_types/test_metrics.py index 94c9521a5..4e9355573 100644 --- a/libs/labelbox/tests/data/annotation_types/test_metrics.py +++ b/libs/labelbox/tests/data/annotation_types/test_metrics.py @@ -8,7 +8,11 @@ ConfusionMatrixMetric, ScalarMetric, ) -from labelbox.data.annotation_types import ScalarMetric, Label, ImageData +from labelbox.data.annotation_types import ( + ScalarMetric, + Label, + GenericDataRowData, +) from labelbox.data.annotation_types.metrics.scalar import RESERVED_METRIC_NAMES from pydantic import ValidationError @@ -19,7 +23,8 @@ def test_legacy_scalar_metric(): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -72,7 +77,8 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { @@ -134,7 +140,8 @@ def test_custom_confusison_matrix_metric( assert metric.value == value label = Label( - data=ImageData(uid="ckrmd9q8g000009mg6vej7hzg"), annotations=[metric] + data=GenericDataRowData(uid="ckrmd9q8g000009mg6vej7hzg"), + annotations=[metric], ) expected = { "data": { diff --git a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py index 115194a58..28ef6e0cf 100644 --- a/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py +++ b/libs/labelbox/tests/data/export/streamable/test_export_video_streamable.py @@ -4,7 +4,7 @@ import labelbox as lb import labelbox.types as lb_types -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.schema.annotation_import import AnnotationImportState from labelbox.schema.export_task import ExportTask, StreamType @@ -41,7 +41,7 @@ def test_export( for data_row_uid in data_row_uids: labels = [ lb_types.Label( - data=VideoData(uid=data_row_uid), + data=GenericDataRowData(uid=data_row_uid), annotations=bbox_video_annotation_objects, ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py index 59f568c75..fb78916f4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_checklist.py @@ -4,7 +4,7 @@ ClassificationAnswer, Radio, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -13,9 +13,8 @@ def test_serialization_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -41,9 +40,8 @@ def test_serialization_min(): def test_serialization_with_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -131,9 +129,8 @@ def test_serialization_with_classification(): def test_serialization_with_classification_double_nested(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -224,9 +221,8 @@ def test_serialization_with_classification_double_nested(): def test_serialization_with_classification_double_nested_2(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py index 561f9ce86..5aa7285e2 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_conversation.py @@ -19,7 +19,7 @@ radio_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="radio", @@ -48,7 +48,7 @@ checklist_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="checklist", @@ -78,7 +78,7 @@ ] free_text_label = [ lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -164,7 +164,7 @@ def test_conversation_entity_import_without_confidence(): def test_benchmark_reference_label_flag_enabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", @@ -181,7 +181,7 @@ def test_benchmark_reference_label_flag_enabled(): def test_benchmark_reference_label_flag_disabled(): label = lb_types.Label( - data=lb_types.ConversationData(global_key="my_global_key"), + data=lb_types.GenericDataRowData(global_key="my_global_key"), annotations=[ lb_types.ClassificationAnnotation( name="free_text", diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py index 762891aa2..6a00fa871 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_dicom.py @@ -31,7 +31,7 @@ ] polyline_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), + data=lb_types.GenericDataRowData(uid="test-uid"), annotations=dicom_polyline_annotations, ) @@ -58,7 +58,7 @@ } polyline_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=dicom_polyline_annotations, ) @@ -109,11 +109,12 @@ } video_mask_label = lb_types.Label( - data=lb_types.VideoData(uid="test-uid"), annotations=[video_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[video_mask_annotation], ) video_mask_label_with_global_key = lb_types.Label( - data=lb_types.VideoData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[video_mask_annotation], ) """ @@ -128,11 +129,12 @@ ) dicom_mask_label = lb_types.Label( - data=lb_types.DicomData(uid="test-uid"), annotations=[dicom_mask_annotation] + data=lb_types.GenericDataRowData(uid="test-uid"), + annotations=[dicom_mask_annotation], ) dicom_mask_label_with_global_key = lb_types.Label( - data=lb_types.DicomData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[dicom_mask_annotation], ) diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_document.py b/libs/labelbox/tests/data/serialization/ndjson/test_document.py index a0897ad9f..fcdf4368b 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_document.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_document.py @@ -26,7 +26,7 @@ ) bbox_labels = [ lb_types.Label( - data=lb_types.DocumentData(global_key="test-global-key"), + data=lb_types.GenericDataRowData(global_key="test-global-key"), annotations=[bbox_annotation], ) ] diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py index 1ab678cde..a0cd13e81 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_export_video_objects.py @@ -1,13 +1,13 @@ from labelbox.data.annotation_types import Label, VideoObjectAnnotation from labelbox.data.serialization.ndjson.converter import NDJsonConverter from labelbox.data.annotation_types.geometry import Rectangle, Point -from labelbox.data.annotation_types import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData def video_bbox_label(): return Label( uid="cl1z52xwh00050fhcmfgczqvn", - data=VideoData( + data=GenericDataRowData( uid="cklr9mr4m5iao0rb6cvxu4qbn", url="https://storage.labelbox.com/ckcz6bubudyfi0855o1dt1g9s%2F26403a22-604a-a38c-eeff-c2ed481fb40a-cat.mp4?Expires=1651677421050&KeyName=labelbox-assets-key-3&Signature=vF7gMyfHzgZdfbB8BHgd88Ws-Ms", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py index 349be13a8..7b03a8447 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_free_text.py @@ -5,7 +5,7 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -14,7 +14,7 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), @@ -38,7 +38,7 @@ def test_serialization(): def test_nested_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", text="This is a test", ), diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_image.py b/libs/labelbox/tests/data/serialization/ndjson/test_image.py index d67acb9c3..4d615658c 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_image.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_image.py @@ -11,7 +11,6 @@ Mask, Label, ObjectAnnotation, - ImageData, MaskData, ) from labelbox.types import Rectangle, Polygon, Point @@ -262,7 +261,7 @@ def test_mask_from_arr(): ), ) ], - data=ImageData(uid="0" * 25), + data=GenericDataRowData(uid="0" * 25), ) res = next(NDJsonConverter.serialize([label])) res.pop("uuid") diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py index 4458e335c..ec57f0528 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_radio.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_radio.py @@ -3,7 +3,7 @@ ClassificationAnswer, ) from labelbox.data.annotation_types.classification.classification import Radio -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -12,9 +12,8 @@ def test_serialization_with_radio_min(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( @@ -43,9 +42,8 @@ def test_serialization_with_radio_min(): def test_serialization_with_radio_classification(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_text.py b/libs/labelbox/tests/data/serialization/ndjson/test_text.py index 21db389cb..28eba07bd 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_text.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_text.py @@ -2,7 +2,7 @@ from labelbox.data.annotation_types.classification.classification import ( Text, ) -from labelbox.data.annotation_types.data.text import TextData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.label import Label from labelbox.data.serialization.ndjson.converter import NDJsonConverter @@ -11,9 +11,8 @@ def test_serialization(): label = Label( uid="ckj7z2q0b0000jx6x0q2q7q0d", - data=TextData( + data=GenericDataRowData( uid="bkj7z2q0b0000jx6x0q2q7q0d", - text="This is a test", ), annotations=[ ClassificationAnnotation( diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_video.py b/libs/labelbox/tests/data/serialization/ndjson/test_video.py index 4fba5c2ca..6c14343a4 100644 --- a/libs/labelbox/tests/data/serialization/ndjson/test_video.py +++ b/libs/labelbox/tests/data/serialization/ndjson/test_video.py @@ -6,11 +6,10 @@ Radio, Text, ) -from labelbox.data.annotation_types.data.video import VideoData +from labelbox.data.annotation_types.data import GenericDataRowData from labelbox.data.annotation_types.geometry.line import Line from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.geometry.rectangle import Rectangle -from labelbox.data.annotation_types.geometry.point import Point from labelbox.data.annotation_types.label import Label from labelbox.data.annotation_types.video import ( @@ -28,7 +27,7 @@ def test_video(): labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( feature_schema_id="ckrb1sfjx099a0y914hl319ie", @@ -304,7 +303,7 @@ def test_video_name_only(): data = json.load(file) labels = [ Label( - data=VideoData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), + data=GenericDataRowData(uid="ckrb1sf1i1g7i0ybcdc6oc8ct"), annotations=[ VideoClassificationAnnotation( name="question 1", @@ -574,7 +573,7 @@ def test_video_name_only(): def test_video_classification_global_subclassifications(): label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=[ @@ -790,7 +789,7 @@ def test_video_classification_nesting_bbox(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -940,7 +939,7 @@ def test_video_classification_point(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, @@ -1108,7 +1107,7 @@ def test_video_classification_frameline(): ] label = Label( - data=VideoData( + data=GenericDataRowData( global_key="sample-video-4.mp4", ), annotations=bbox_annotation, diff --git a/libs/labelbox/tests/unit/test_label_data_type.py b/libs/labelbox/tests/unit/test_label_data_type.py index 7bc32e37c..611324f78 100644 --- a/libs/labelbox/tests/unit/test_label_data_type.py +++ b/libs/labelbox/tests/unit/test_label_data_type.py @@ -1,11 +1,7 @@ -from email import message import pytest -from pydantic import ValidationError - from labelbox.data.annotation_types.data.generic_data_row_data import ( GenericDataRowData, ) -from labelbox.data.annotation_types.data.video import VideoData from labelbox.data.annotation_types.label import Label @@ -37,20 +33,6 @@ def test_generic_data_type_validations(): Label(data=data) -def test_video_data_type(): - data = { - "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr", - } - with pytest.warns(UserWarning, match="Use a dict"): - label = Label(data=VideoData(**data)) - data = label.data - assert isinstance(data, VideoData) - assert ( - data.global_key - == "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr" - ) - - def test_generic_data_row(): data = { "global_key": "https://lb-test-data.s3.us-west-1.amazonaws.com/image-samples/sample-image-1.jpg-BEidMVWRmyXjVCnr",