diff --git a/docs/conf.py b/docs/conf.py index 51648857e..07656e3a0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ project = 'Python SDK reference' copyright = '2024, Labelbox' author = 'Labelbox' -release = '5.1.0' +release = '5.2.1' # -- General configuration --------------------------------------------------- diff --git a/docs/labelbox/datarow_payload_templates.rst b/docs/labelbox/datarow_payload_templates.rst new file mode 100644 index 000000000..34dac6111 --- /dev/null +++ b/docs/labelbox/datarow_payload_templates.rst @@ -0,0 +1,6 @@ +Datarow payload templates +=============================================================================================== + +.. automodule:: labelbox.schema.data_row_payload_templates + :members: + :show-inheritance: \ No newline at end of file diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md index 6b23cf6bc..c6a21580c 100644 --- a/libs/labelbox/CHANGELOG.md +++ b/libs/labelbox/CHANGELOG.md @@ -1,6 +1,14 @@ # Changelog -# Version 5.1.0 (2024-09-27) +# Version 5.2.1 (2024-10-09) ## Fixed +* Exporter encoding + +# Version 5.2.0 (2024-10-09) +## Added +* Support data row / batch for live mmc projects([#1856](https://github.com/Labelbox/labelbox-python/pull/1856)) + +# Version 5.1.0 (2024-09-27) +## Added * Support self-signed SSL certs([#1811](https://github.com/Labelbox/labelbox-python/pull/1811)) * Rectangle units now correctly support percent inputs([#1848](https://github.com/Labelbox/labelbox-python/pull/1848)) diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml index fa64c74f6..f58dba890 100644 --- a/libs/labelbox/pyproject.toml +++ b/libs/labelbox/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "labelbox" -version = "5.1.0" +version = "5.2.1" description = "Labelbox Python API" authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }] dependencies = [ @@ -15,7 +15,7 @@ dependencies = [ "lbox-clients==1.1.0", ] readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.9,<3.13" classifiers = [ # How mature is this project? "Development Status :: 5 - Production/Stable", diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py index 850aec0be..ce02a1b44 100644 --- a/libs/labelbox/src/labelbox/__init__.py +++ b/libs/labelbox/src/labelbox/__init__.py @@ -1,45 +1,58 @@ name = "labelbox" -__version__ = "5.1.0" +__version__ = "5.2.1" from labelbox.client import Client -from labelbox.schema.project import Project -from labelbox.schema.model import Model -from labelbox.schema.model_config import ModelConfig from labelbox.schema.annotation_import import ( + LabelImport, MALPredictionImport, MEAPredictionImport, - LabelImport, MEAToMALPredictionImport, ) -from labelbox.schema.dataset import Dataset -from labelbox.schema.data_row import DataRow +from labelbox.schema.asset_attachment import AssetAttachment +from labelbox.schema.batch import Batch +from labelbox.schema.benchmark import Benchmark from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRow +from labelbox.schema.data_row_metadata import ( + DataRowMetadata, + DataRowMetadataField, + DataRowMetadataOntology, + DeleteDataRowMetadata, +) +from labelbox.schema.dataset import Dataset from labelbox.schema.enums import AnnotationImportState -from labelbox.schema.label import Label -from labelbox.schema.batch import Batch -from labelbox.schema.review import Review -from labelbox.schema.user import User -from labelbox.schema.organization import Organization -from labelbox.schema.task import Task from labelbox.schema.export_task import ( - StreamType, + BufferedJsonConverterOutput, ExportTask, BufferedJsonConverterOutput, ) +from labelbox.schema.iam_integration import IAMIntegration +from labelbox.schema.identifiable import GlobalKey, UniqueId +from labelbox.schema.identifiables import DataRowIds, GlobalKeys, UniqueIds +from labelbox.schema.invite import Invite, InviteLimit +from labelbox.schema.label import Label +from labelbox.schema.label_score import LabelScore from labelbox.schema.labeling_frontend import ( LabelingFrontend, LabelingFrontendOptions, ) -from labelbox.schema.asset_attachment import AssetAttachment -from labelbox.schema.webhook import Webhook +from labelbox.schema.labeling_service import LabelingService +from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard +from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.media_type import MediaType +from labelbox.schema.model import Model +from labelbox.schema.model_config import ModelConfig +from labelbox.schema.model_run import DataSplit, ModelRun from labelbox.schema.ontology import ( + Classification, + FeatureSchema, Ontology, OntologyBuilder, - Classification, Option, + PromptResponseClassification, + ResponseOption, Tool, - FeatureSchema, ) from labelbox.schema.ontology import PromptResponseClassification from labelbox.schema.ontology import ResponseOption @@ -64,10 +77,19 @@ from labelbox.schema.identifiables import UniqueIds, GlobalKeys, DataRowIds from labelbox.schema.identifiable import UniqueId, GlobalKey from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.organization import Organization +from labelbox.schema.project import Project +from labelbox.schema.project_model_config import ProjectModelConfig from labelbox.schema.project_overview import ( ProjectOverview, ProjectOverviewDetailed, ) -from labelbox.schema.labeling_service import LabelingService -from labelbox.schema.labeling_service_dashboard import LabelingServiceDashboard -from labelbox.schema.labeling_service_status import LabelingServiceStatus +from labelbox.schema.project_resource_tag import ProjectResourceTag +from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.review import Review +from labelbox.schema.role import ProjectRole, Role +from labelbox.schema.slice import CatalogSlice, ModelSlice, Slice +from labelbox.schema.task import Task +from labelbox.schema.task_queue import TaskQueue +from labelbox.schema.user import User +from labelbox.schema.webhook import Webhook diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index aa08ab0b3..bcf29665e 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -6,6 +6,7 @@ import random import time import urllib.parse +import warnings from collections import defaultdict from datetime import datetime, timezone from types import MappingProxyType @@ -584,7 +585,7 @@ def create_dataset( ) if not validation_result["validateDataset"]["valid"]: - raise LabelboxError( + raise labelbox.exceptions.LabelboxError( "IAMIntegration was not successfully added to the dataset." ) except Exception as e: @@ -649,7 +650,7 @@ def create_model_evaluation_project( is_consensus_enabled: Optional[bool] = None, dataset_id: Optional[str] = None, dataset_name: Optional[str] = None, - data_row_count: int = 100, + data_row_count: Optional[int] = None, ) -> Project: """ Use this method exclusively to create a chat model evaluation project. @@ -674,19 +675,20 @@ def create_model_evaluation_project( >>> client.create_model_evaluation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10) >>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project. + >>> client.create_model_evaluation_project(name=project_name) + >>> This creates a new project with no data rows. """ - if not dataset_id and not dataset_name: - raise ValueError( - "dataset_name or data_set_id must be present and not be an empty string." - ) + dataset_name_or_id = dataset_id or dataset_name + append_to_existing_dataset = bool(dataset_id) - if dataset_id: - append_to_existing_dataset = True - dataset_name_or_id = dataset_id - else: - append_to_existing_dataset = False - dataset_name_or_id = dataset_name + if dataset_name_or_id: + if data_row_count is None: + data_row_count = 100 + warnings.warn( + "Automatic generation of data rows of live model evaluation projects is deprecated. dataset_name_or_id, append_to_existing_dataset, data_row_count will be removed in a future version.", + DeprecationWarning, + ) media_type = MediaType.Conversational editor_task_type = EditorTaskType.ModelChatEvaluation diff --git a/libs/labelbox/src/labelbox/data/annotation_types/collection.py b/libs/labelbox/src/labelbox/data/annotation_types/collection.py index 51bcce1b2..42d2a1184 100644 --- a/libs/labelbox/src/labelbox/data/annotation_types/collection.py +++ b/libs/labelbox/src/labelbox/data/annotation_types/collection.py @@ -1,14 +1,10 @@ import logging -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable, Generator, Iterable, Union, Optional -from uuid import uuid4 import warnings +from typing import Callable, Generator, Iterable, Union -from tqdm import tqdm - -from labelbox.schema import ontology from labelbox.orm.model import Entity -from ..ontology import get_classifications, get_tools +from labelbox.schema import ontology + from ..generator import PrefetchGenerator from .label import Label diff --git a/libs/labelbox/src/labelbox/project_validation.py b/libs/labelbox/src/labelbox/project_validation.py index 41f1fa762..2a6db9e2a 100644 --- a/libs/labelbox/src/labelbox/project_validation.py +++ b/libs/labelbox/src/labelbox/project_validation.py @@ -69,6 +69,9 @@ def validate_fields(self): is_consensus_enabled=True, ) + if self.data_row_count is not None and self.data_row_count < 0: + raise ValueError("data_row_count must be a positive integer.") + return self def _set_quality_mode_attributes( diff --git a/libs/labelbox/src/labelbox/schema/catalog.py b/libs/labelbox/src/labelbox/schema/catalog.py index df50503ad..8d9646779 100644 --- a/libs/labelbox/src/labelbox/schema/catalog.py +++ b/libs/labelbox/src/labelbox/schema/catalog.py @@ -48,7 +48,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/data_row.py b/libs/labelbox/src/labelbox/schema/data_row.py index ed1184c12..cb0e99b22 100644 --- a/libs/labelbox/src/labelbox/schema/data_row.py +++ b/libs/labelbox/src/labelbox/schema/data_row.py @@ -280,7 +280,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py new file mode 100644 index 000000000..2e2728daa --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/data_row_payload_templates.py @@ -0,0 +1,41 @@ +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from labelbox.schema.data_row import DataRowMetadataField + + +class ModelEvalutationTemplateRowData(BaseModel): + type: str = Field( + default="application/vnd.labelbox.conversational.model-chat-evaluation", + frozen=True, + ) + draft: bool = Field(default=True, frozen=True) + rootMessageIds: List[str] = Field(default=[]) + actors: Dict = Field(default={}) + version: int = Field(default=2, frozen=True) + messages: Dict = Field(default={}) + global_key: Optional[str] = None + + +class ModelEvaluationTemplate(BaseModel): + """ + Use this class to create a model evaluation data row. + + Examples: + >>> data = ModelEvaluationTemplate() + >>> data.row_data.rootMessageIds = ["root1"] + >>> vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + >>> data.embeddings = [...] + >>> data.metadata_fields = [...] + >>> data.attachments = [...] + >>> content = data.model_dump() + >>> task = dataset.create_data_rows([content]) + """ + + row_data: ModelEvalutationTemplateRowData = Field( + default=ModelEvalutationTemplateRowData() + ) + attachments: List[Dict] = Field(default=[]) + embeddings: List[Dict] = Field(default=[]) + metadata_fields: List[DataRowMetadataField] = Field(default=[]) diff --git a/libs/labelbox/src/labelbox/schema/dataset.py b/libs/labelbox/src/labelbox/schema/dataset.py index df467caf1..107f3f50b 100644 --- a/libs/labelbox/src/labelbox/schema/dataset.py +++ b/libs/labelbox/src/labelbox/schema/dataset.py @@ -360,7 +360,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/export_task.py b/libs/labelbox/src/labelbox/schema/export_task.py index b2e1d054f..7e78fc3e9 100644 --- a/libs/labelbox/src/labelbox/schema/export_task.py +++ b/libs/labelbox/src/labelbox/schema/export_task.py @@ -1,9 +1,14 @@ +import json +import os +import tempfile +import warnings from abc import ABC, abstractmethod from dataclasses import dataclass from enum import Enum from functools import lru_cache -import json from typing import ( + TYPE_CHECKING, + Any, Callable, Generic, Iterator, @@ -11,17 +16,13 @@ Tuple, TypeVar, Union, - TYPE_CHECKING, - Any, ) import requests -import tempfile -import os +from pydantic import BaseModel from labelbox.schema.task import Task from labelbox.utils import _CamelCaseMixin -from pydantic import BaseModel if TYPE_CHECKING: from labelbox import Client @@ -120,6 +121,7 @@ def _get_file_content( ) response = requests.get(file_info.file, timeout=30) response.raise_for_status() + response.encoding = "utf-8" assert ( len(response.content) == file_info.offsets.end - file_info.offsets.start + 1 diff --git a/libs/labelbox/src/labelbox/schema/model_run.py b/libs/labelbox/src/labelbox/schema/model_run.py index 053c43b97..dcdbdf0e8 100644 --- a/libs/labelbox/src/labelbox/schema/model_run.py +++ b/libs/labelbox/src/labelbox/schema/model_run.py @@ -541,7 +541,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index ae97089a7..a6f2dfe28 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -417,7 +417,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) @@ -739,7 +739,9 @@ def create_batch( lbox.exceptions.ValueError if a project is not batch mode, if the project is auto data generation, if the batch exceeds 100k data rows """ - if self.is_auto_data_generation(): + if ( + self.is_auto_data_generation() and not self.is_chat_evaluation() + ): # NOTE live chat evaluatiuon projects in sdk do not pre-generate data rows, but use batch as all other projects raise ValueError( "Cannot create batches for auto data generation projects" ) diff --git a/libs/labelbox/src/labelbox/schema/slice.py b/libs/labelbox/src/labelbox/schema/slice.py index 3bad8cf07..a640ebc1d 100644 --- a/libs/labelbox/src/labelbox/schema/slice.py +++ b/libs/labelbox/src/labelbox/schema/slice.py @@ -129,7 +129,7 @@ def export_v2( """ warnings.warn( - "You are currently utilizing export_v2 for this action, which will be deprecated in a V7. Please refer to docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", + "You are currently utilizing export_v2 for this action, which will be removed in 7.0. Please refer to our docs for export alternatives. https://docs.labelbox.com/reference/export-overview#export-methods", DeprecationWarning, stacklevel=2, ) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index a07d52c4d..3e9f0b491 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -19,6 +19,7 @@ Client, DataRow, Dataset, + LabelingFrontend, MediaType, OntologyBuilder, Option, diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index 2342a759a..001b96771 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -2399,6 +2399,10 @@ def expected_export_v2_document(): "height": 65.0, "width": 12.0, }, + "page_dimensions": { + "height": 792.0, + "width": 612.0, + }, }, ], "classifications": [ diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py index b1b81230e..4a59b6966 100644 --- a/libs/labelbox/tests/data/export/conftest.py +++ b/libs/labelbox/tests/data/export/conftest.py @@ -1,8 +1,9 @@ import time -from labelbox import MediaType, Client import uuid + import pytest +from labelbox import Client, MediaType from labelbox.schema.annotation_import import AnnotationImportState, LabelImport from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.media_type import MediaType diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 88670811e..c01cdd28e 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -5,13 +5,13 @@ import time import uuid from collections import defaultdict +from datetime import datetime, timezone from enum import Enum from itertools import islice from types import SimpleNamespace from typing import List, Tuple, Type import pytest -import requests from labelbox import ( Classification, @@ -30,6 +30,7 @@ from labelbox.pagination import PaginatedCollection from labelbox.schema.annotation_import import LabelImport from labelbox.schema.catalog import Catalog +from labelbox.schema.data_row import DataRowMetadataField from labelbox.schema.enums import AnnotationImportState from labelbox.schema.invite import Invite from labelbox.schema.ontology_kind import OntologyKind @@ -37,6 +38,30 @@ from labelbox.schema.user import User +@pytest.fixture +def constants(): + SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" + TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" + TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" + CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" + EXPECTED_METADATA_SCHEMA_IDS = [ + SPLIT_SCHEMA_ID, + TEST_SPLIT_ID, + TEXT_SCHEMA_ID, + CAPTURE_DT_SCHEMA_ID, + ] + CUSTOM_TEXT_SCHEMA_NAME = "custom_text" + + return { + "SPLIT_SCHEMA_ID": SPLIT_SCHEMA_ID, + "TEST_SPLIT_ID": TEST_SPLIT_ID, + "TEXT_SCHEMA_ID": TEXT_SCHEMA_ID, + "CAPTURE_DT_SCHEMA_ID": CAPTURE_DT_SCHEMA_ID, + "EXPECTED_METADATA_SCHEMA_IDS": EXPECTED_METADATA_SCHEMA_IDS, + "CUSTOM_TEXT_SCHEMA_NAME": CUSTOM_TEXT_SCHEMA_NAME, + } + + @pytest.fixture def project_based_user(client, rand_gen): email = rand_gen(str) @@ -632,11 +657,28 @@ def chat_evaluation_ontology(client, rand_gen): @pytest.fixture -def live_chat_evaluation_project_with_new_dataset(client, rand_gen): +def live_chat_evaluation_project(client, rand_gen): project_name = f"test-model-evaluation-project-{rand_gen(str)}" - dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}" - project = client.create_model_evaluation_project( - name=project_name, dataset_name=dataset_name, data_row_count=1 + project = client.create_model_evaluation_project(name=project_name) + + yield project + + project.delete() + + +@pytest.fixture +def live_chat_evaluation_project_with_batch( + client, + rand_gen, + live_chat_evaluation_project, + offline_conversational_data_row, +): + project_name = f"test-model-evaluation-project-{rand_gen(str)}" + project = client.create_model_evaluation_project(name=project_name) + + project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects ) yield project @@ -832,3 +874,21 @@ def print_perf_summary(): for aaa in islice(sorted_dict, num_of_entries) ] print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) + + +@pytest.fixture +def make_metadata_fields(constants): + msg = "A message" + time = datetime.now(timezone.utc) + + fields = [ + DataRowMetadataField( + schema_id=constants["SPLIT_SCHEMA_ID"], + value=constants["TEST_SPLIT_ID"], + ), + DataRowMetadataField( + schema_id=constants["CAPTURE_DT_SCHEMA_ID"], value=time + ), + DataRowMetadataField(schema_id=constants["TEXT_SCHEMA_ID"], value=msg), + ] + return fields diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 3e462d677..796cd9859 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -7,9 +7,8 @@ def test_create_chat_evaluation_ontology_project( - client, chat_evaluation_ontology, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, offline_conversational_data_row, rand_gen, ): @@ -28,7 +27,7 @@ def test_create_chat_evaluation_ontology_project( assert classification.schema_id assert classification.feature_schema_id - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project assert project.model_setup_complete is None project.connect_ontology(ontology) @@ -36,28 +35,11 @@ def test_create_chat_evaluation_ontology_project( assert project.labeling_frontend().name == "Editor" assert project.ontology().name == ontology.name - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - project.create_batch( - rand_gen(str), - [offline_conversational_data_row.uid], # sample of data row objects - ) - - with pytest.raises( - ValueError, - match="Cannot create batches for auto data generation projects", - ): - with patch( - "labelbox.schema.project.MAX_SYNC_BATCH_ROW_COUNT", new=0 - ): # force to async - project.create_batch( - rand_gen(str), - [ - offline_conversational_data_row.uid - ], # sample of data row objects - ) + batch = project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects + ) + assert batch def test_create_chat_evaluation_ontology_project_existing_dataset( diff --git a/libs/labelbox/tests/integration/test_data_rows.py b/libs/labelbox/tests/integration/test_data_rows.py index 481385e75..485719575 100644 --- a/libs/labelbox/tests/integration/test_data_rows.py +++ b/libs/labelbox/tests/integration/test_data_rows.py @@ -21,24 +21,17 @@ from labelbox.schema.media_type import MediaType from labelbox.schema.task import Task -SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" -TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" -CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" -EXPECTED_METADATA_SCHEMA_IDS = [ - SPLIT_SCHEMA_ID, - TEST_SPLIT_ID, - TEXT_SCHEMA_ID, - CAPTURE_DT_SCHEMA_ID, -].sort() -CUSTOM_TEXT_SCHEMA_NAME = "custom_text" - @pytest.fixture -def mdo(client): +def mdo( + client, + constants, +): mdo = client.get_data_row_metadata_ontology() try: - mdo.create_schema(CUSTOM_TEXT_SCHEMA_NAME, DataRowMetadataKind.string) + mdo.create_schema( + constants["CUSTOM_TEXT_SCHEMA_NAME"], DataRowMetadataKind.string + ) except MalformedQueryException: # Do nothing if already exists pass @@ -93,26 +86,18 @@ def tile_content(): } -def make_metadata_fields(): - msg = "A message" - time = datetime.now(timezone.utc) - - fields = [ - DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), - DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - ] - return fields - - -def make_metadata_fields_dict(): +@pytest.fixture +def make_metadata_fields_dict(constants): msg = "A message" time = datetime.now(timezone.utc) fields = [ - {"schema_id": SPLIT_SCHEMA_ID, "value": TEST_SPLIT_ID}, - {"schema_id": CAPTURE_DT_SCHEMA_ID, "value": time}, - {"schema_id": TEXT_SCHEMA_ID, "value": msg}, + { + "schema_id": constants["SPLIT_SCHEMA_ID"], + "value": constants["TEST_SPLIT_ID"], + }, + {"schema_id": constants["CAPTURE_DT_SCHEMA_ID"], "value": time}, + {"schema_id": constants["TEXT_SCHEMA_ID"], "value": msg}, ] return fields @@ -375,15 +360,22 @@ def test_create_data_row_with_invalid_input(dataset, image_url): dataset.create_data_row("asdf") -def test_create_data_row_with_metadata(mdo, dataset, image_url): +def test_create_data_row_with_metadata( + mdo, + dataset, + image_url, + make_metadata_fields, + constants, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields() + row_data=image_url, metadata_fields=make_metadata_fields ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -396,22 +388,24 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): +def test_create_data_row_with_metadata_dict( + mdo, dataset, image_url, constants, make_metadata_fields_dict +): client = dataset.client assert len(list(dataset.data_rows())) == 0 data_row = dataset.create_data_row( - row_data=image_url, metadata_fields=make_metadata_fields_dict() + row_data=image_url, metadata_fields=make_metadata_fields_dict ) - assert len(list(dataset.data_rows())) == 1 + assert len([dr for dr in dataset.data_rows()]) == 1 assert data_row.dataset() == dataset assert data_row.created_by() == client.get_user() assert data_row.organization() == client.get_organization() @@ -424,25 +418,36 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): metadata = data_row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) -def test_create_data_row_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_row_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) with pytest.raises(ResourceCreationError): dataset.create_data_row(row_data=image_url, metadata_fields=fields) -def test_create_data_rows_with_metadata(mdo, dataset, image_url): +def test_create_data_rows_with_metadata( + mdo, + dataset, + image_url, + constants, + make_metadata_fields, + make_metadata_fields_dict, +): client = dataset.client assert len(list(dataset.data_rows())) == 0 @@ -451,22 +456,22 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): { DataRow.row_data: image_url, DataRow.external_id: "row1", - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row2", - "metadata_fields": make_metadata_fields(), + "metadata_fields": make_metadata_fields, }, { DataRow.row_data: image_url, DataRow.external_id: "row3", - DataRow.metadata_fields: make_metadata_fields_dict(), + DataRow.metadata_fields: make_metadata_fields_dict, }, { DataRow.row_data: image_url, DataRow.external_id: "row4", - "metadata_fields": make_metadata_fields_dict(), + "metadata_fields": make_metadata_fields_dict, }, ] ) @@ -488,9 +493,9 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): metadata = row.metadata assert len(metadata_fields) == 3 assert len(metadata) == 3 - assert [ - m["schemaId"] for m in metadata_fields - ].sort() == EXPECTED_METADATA_SCHEMA_IDS + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() for m in metadata: assert mdo._parse_upsert(m) @@ -505,14 +510,16 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): ], ) def test_create_data_rows_with_named_metadata_field_class( - test_function, metadata_obj_type, mdo, dataset, image_url + test_function, metadata_obj_type, mdo, dataset, image_url, constants ): row_with_metadata_field = { DataRow.row_data: image_url, DataRow.external_id: "row1", DataRow.metadata_fields: [ DataRowMetadataField(name="split", value="test"), - DataRowMetadataField(name=CUSTOM_TEXT_SCHEMA_NAME, value="hello"), + DataRowMetadataField( + name=constants["CUSTOM_TEXT_SCHEMA_NAME"], value="hello" + ), ], } @@ -521,7 +528,7 @@ def test_create_data_rows_with_named_metadata_field_class( DataRow.external_id: "row2", "metadata_fields": [ {"name": "split", "value": "test"}, - {"name": CUSTOM_TEXT_SCHEMA_NAME, "value": "hello"}, + {"name": constants["CUSTOM_TEXT_SCHEMA_NAME"], "value": "hello"}, ], } @@ -552,21 +559,26 @@ def create_data_row(data_rows): assert len(created_rows[0].metadata) == 2 metadata = created_rows[0].metadata - assert metadata[0].schema_id == SPLIT_SCHEMA_ID + assert metadata[0].schema_id == constants["SPLIT_SCHEMA_ID"] assert metadata[0].name == "test" assert metadata[0].value == mdo.reserved_by_name["split"]["test"].uid - assert metadata[1].name == CUSTOM_TEXT_SCHEMA_NAME + assert metadata[1].name == constants["CUSTOM_TEXT_SCHEMA_NAME"] assert metadata[1].value == "hello" assert ( - metadata[1].schema_id == mdo.custom_by_name[CUSTOM_TEXT_SCHEMA_NAME].uid + metadata[1].schema_id + == mdo.custom_by_name[constants["CUSTOM_TEXT_SCHEMA_NAME"]].uid ) -def test_create_data_rows_with_invalid_metadata(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_invalid_metadata( + dataset, image_url, constants, make_metadata_fields +): + fields = make_metadata_fields # make the payload invalid by providing the same schema id more than once fields.append( - DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="some msg") + DataRowMetadataField( + schema_id=constants["TEXT_SCHEMA_ID"], value="some msg" + ) ) task = dataset.create_data_rows( @@ -577,13 +589,15 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url): assert task.status == "COMPLETE" assert len(task.failed_data_rows) == 1 assert ( - f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" + f"A schemaId can only be specified once per DataRow : [{constants['TEXT_SCHEMA_ID']}]" in task.failed_data_rows[0]["message"] ) -def test_create_data_rows_with_metadata_missing_value(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_value( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"schemaId": "some schema id"}) with pytest.raises(ValueError) as exc: @@ -598,8 +612,10 @@ def test_create_data_rows_with_metadata_missing_value(dataset, image_url): ) -def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_missing_schema_id( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append({"value": "some value"}) with pytest.raises(ValueError) as exc: @@ -614,8 +630,10 @@ def test_create_data_rows_with_metadata_missing_schema_id(dataset, image_url): ) -def test_create_data_rows_with_metadata_wrong_type(dataset, image_url): - fields = make_metadata_fields() +def test_create_data_rows_with_metadata_wrong_type( + dataset, image_url, make_metadata_fields +): + fields = make_metadata_fields fields.append("Neither DataRowMetadataField or dict") with pytest.raises(ValueError) as exc: @@ -899,7 +917,11 @@ def test_does_not_update_not_provided_attachment_fields(data_row): assert attachment.attachment_type == "RAW_TEXT" -def test_create_data_rows_result(client, dataset, image_url): +def test_create_data_rows_result( + client, + dataset, + image_url, +): task = dataset.create_data_rows( [ { @@ -918,12 +940,14 @@ def test_create_data_rows_result(client, dataset, image_url): client.get_data_row(result["id"]) -def test_create_data_rows_local_file(dataset, sample_image): +def test_create_data_rows_local_file( + dataset, sample_image, make_metadata_fields +): task = dataset.create_data_rows( [ { DataRow.row_data: sample_image, - DataRow.metadata_fields: make_metadata_fields(), + DataRow.metadata_fields: make_metadata_fields, } ] ) diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index 9d5e178b6..e8b3d4cdc 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,7 +1,6 @@ import pytest from lbox.exceptions import ( LabelboxError, - MalformedQueryException, ResourceNotFoundError, ) @@ -43,12 +42,11 @@ def test_request_labeling_service_moe_offline_project( def test_request_labeling_service_moe_project( - rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project_with_batch, chat_evaluation_ontology, model_config, ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project_with_batch project.connect_ontology(chat_evaluation_ontology) project.upsert_instructions("tests/integration/media/sample_pdf.pdf") diff --git a/libs/labelbox/tests/integration/test_mmc_data_rows.py b/libs/labelbox/tests/integration/test_mmc_data_rows.py new file mode 100644 index 000000000..2fa7bdd1b --- /dev/null +++ b/libs/labelbox/tests/integration/test_mmc_data_rows.py @@ -0,0 +1,82 @@ +import json +import random + +import pytest + +from labelbox.schema.data_row_payload_templates import ModelEvaluationTemplate + + +@pytest.fixture +def mmc_data_row(dataset): + data = ModelEvaluationTemplate() + + content_all = data.model_dump(exclude_none=True) + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +@pytest.fixture +def mmc_data_row_all(dataset, make_metadata_fields, embedding): + data = ModelEvaluationTemplate() + data.row_data.rootMessageIds = ["root1"] + data.row_data.global_key = "global_key" + vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)] + data.embeddings = [{"embedding_id": embedding.id, "vector": vector}] + data.metadata_fields = make_metadata_fields + data.attachments = [{"type": "RAW_TEXT", "value": "attachment value"}] + + content_all = data.model_dump(exclude_none=True) + task = dataset.create_data_rows([content_all]) + task.wait_till_done() + assert task.status == "COMPLETE" + + data_row = list(dataset.data_rows())[0] + + yield data_row + + data_row.delete() + + +def test_mmc(mmc_data_row): + data_row = mmc_data_row + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": [], + "actors": {}, + "messages": {}, + "version": 2, + } + + +def test_mmc_all(mmc_data_row_all, embedding, constants): + data_row = mmc_data_row_all + assert json.loads(data_row.row_data) == { + "type": "application/vnd.labelbox.conversational.model-chat-evaluation", + "draft": True, + "rootMessageIds": ["root1"], + "actors": {}, + "messages": {}, + "version": 2, + "globalKey": "global_key", + } + + metadata_fields = data_row.metadata_fields + metadata = data_row.metadata + assert len(metadata_fields) == 3 + assert len(metadata) == 3 + assert [m["schemaId"] for m in metadata_fields].sort() == constants[ + "EXPECTED_METADATA_SCHEMA_IDS" + ].sort() + + attachments = list(data_row.attachments()) + assert len(attachments) == 1 + + assert embedding.get_imported_vector_count() == 1 diff --git a/libs/labelbox/tests/integration/test_project_model_config.py b/libs/labelbox/tests/integration/test_project_model_config.py index f86bbb38e..f1646dfc0 100644 --- a/libs/labelbox/tests/integration/test_project_model_config.py +++ b/libs/labelbox/tests/integration/test_project_model_config.py @@ -2,10 +2,8 @@ from lbox.exceptions import ResourceNotFoundError -def test_add_single_model_config( - live_chat_evaluation_project_with_new_dataset, model_config -): - configured_project = live_chat_evaluation_project_with_new_dataset +def test_add_single_model_config(live_chat_evaluation_project, model_config): + configured_project = live_chat_evaluation_project project_model_config_id = configured_project.add_model_config( model_config.uid ) @@ -22,11 +20,11 @@ def test_add_single_model_config( def test_add_multiple_model_config( client, rand_gen, - live_chat_evaluation_project_with_new_dataset, + live_chat_evaluation_project, model_config, valid_model_id, ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project second_model_config = client.create_model_config( rand_gen(str), valid_model_id, {"param": "value"} ) @@ -52,9 +50,9 @@ def test_add_multiple_model_config( def test_delete_project_model_config( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - configured_project = live_chat_evaluation_project_with_new_dataset + configured_project = live_chat_evaluation_project assert configured_project.delete_project_model_config( configured_project.add_model_config(model_config.uid) ) diff --git a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py index 8872a27f4..30e179028 100644 --- a/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py +++ b/libs/labelbox/tests/integration/test_project_set_model_setup_complete.py @@ -3,9 +3,9 @@ def test_live_chat_evaluation_project( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project.set_project_model_setup_complete() assert bool(project.model_setup_complete) is True @@ -18,9 +18,9 @@ def test_live_chat_evaluation_project( def test_live_chat_evaluation_project_delete_cofig( - live_chat_evaluation_project_with_new_dataset, model_config + live_chat_evaluation_project, model_config ): - project = live_chat_evaluation_project_with_new_dataset + project = live_chat_evaluation_project project_model_config_id = project.add_model_config(model_config.uid) assert project_model_config_id