From 553647dfa4fa2cf0b9ed51c6cb01ded4ea214a8c Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 1 Aug 2024 08:34:58 -0700 Subject: [PATCH 1/4] Add LabelingService request --- .../src/labelbox/schema/labeling_service.py | 37 ++++++++++++++++--- libs/labelbox/src/labelbox/schema/project.py | 9 +++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service.py b/libs/labelbox/src/labelbox/schema/labeling_service.py index 69da03674..3ebbdb11e 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service.py @@ -10,12 +10,13 @@ Cuid = Annotated[str, Field(min_length=25, max_length=25)] + class LabelingServiceStatus(Enum): - Accepted = 'ACCEPTED', - Calibration = 'CALIBRATION', - Complete = 'COMPLETE', - Production = 'PRODUCTION', - Requested = 'REQUESTED', + Accepted = 'ACCEPTED' + Calibration = 'CALIBRATION' + Complete = 'COMPLETE' + Production = 'PRODUCTION' + Requested = 'REQUESTED' SetUp = 'SET_UP' @@ -40,7 +41,7 @@ class Config(_CamelCaseMixin.Config): @classmethod def start(cls, client, project_id: Cuid) -> 'LabelingService': """ - Starts the labeling service for the project. This is equivalent to a UI acction to Request Specialized Labelers + Starts the labeling service for the project. This is equivalent to a UI action to Request Specialized Labelers Returns: LabelingService: The labeling service for the project. @@ -58,6 +59,30 @@ def start(cls, client, project_id: Cuid) -> 'LabelingService': raise Exception("Failed to start labeling service") return cls.get(client, project_id) + def request(self) -> 'LabelingService': + """ + Starts the labeling service for the project. This is equivalent to a UI action to Request Specialized Labelers + + Returns: + LabelingService: The labeling service for the project. + Raises: + Exception: If the service fails to start. + """ + + query_str = """mutation ValidateAndRequestProjectBoostWorkforcePyApi($projectId: ID!) { + validateAndRequestProjectBoostWorkforce( + data: { projectId: $projectId } + ) { + success + } + } + """ + result = self.client.execute(query_str, {"projectId": self.project_id}) + success = result["validateAndRequestProjectBoostWorkforce"]["success"] + if not success: + raise Exception("Failed to start labeling service") + return LabelingService.get(self.client, self.project_id) + @classmethod def get(cls, client, project_id: Cuid) -> 'LabelingService': """ diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 761d0e391..2ca0ea82a 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1949,6 +1949,15 @@ def request_labeling_service(self) -> LabelingService: """ return LabelingService.start(self.client, self.uid) # type: ignore + @experimental + def start_labeling_service(self) -> LabelingService: + """Submit a request to start the labeling service for this project. + + Returns: + LabelingService: The labeling service for this project. + """ + return LabelingService.start(self.client, self.uid) # type: ignore + class ProjectMember(DbObject): user = Relationship.ToOne("User", cache=True) From 3140eab38a8c773a86eb970ff7ed778c17305ac7 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 1 Aug 2024 14:17:04 -0700 Subject: [PATCH 2/4] Optionally allow Client to raise RESOURCE_NOT_FOUND errors --- libs/labelbox/src/labelbox/client.py | 58 +++++++++++-------- .../src/labelbox/schema/labeling_service.py | 3 +- libs/labelbox/src/labelbox/schema/project.py | 9 --- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 6e05721dc..86c2f86e2 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -145,7 +145,8 @@ def execute(self, files=None, timeout=60.0, experimental=False, - error_log_key="message"): + error_log_key="message", + raise_return_resource_not_found=False): """ Sends a request to the server for the execution of the given query. @@ -297,9 +298,13 @@ def get_error_status_code(error: dict) -> int: resource_not_found_error = check_errors(["RESOURCE_NOT_FOUND"], "extensions", "code") if resource_not_found_error is not None: - # Return None and let the caller methods raise an exception - # as they already know which resource type and ID was requested - return None + if raise_return_resource_not_found: + raise labelbox.exceptions.ResourceNotFoundError( + message=resource_not_found_error["message"]) + else: + # Return None and let the caller methods raise an exception + # as they already know which resource type and ID was requested + return None resource_conflict_error = check_errors(["RESOURCE_CONFLICT"], "extensions", "code") @@ -875,12 +880,12 @@ def create_offline_model_evaluation_project(self, **kwargs) -> Project: return self._create_project(**kwargs) - - def create_prompt_response_generation_project(self, - dataset_id: Optional[str] = None, - dataset_name: Optional[str] = None, - data_row_count: int = 100, - **kwargs) -> Project: + def create_prompt_response_generation_project( + self, + dataset_id: Optional[str] = None, + dataset_name: Optional[str] = None, + data_row_count: int = 100, + **kwargs) -> Project: """ Use this method exclusively to create a prompt and response generation project. @@ -915,8 +920,7 @@ def create_prompt_response_generation_project(self, if dataset_id and dataset_name: raise ValueError( - "Only provide a dataset_name or dataset_id, not both." - ) + "Only provide a dataset_name or dataset_id, not both.") if data_row_count <= 0: raise ValueError("data_row_count must be a positive integer.") @@ -928,7 +932,9 @@ def create_prompt_response_generation_project(self, append_to_existing_dataset = False dataset_name_or_id = dataset_name - if "media_type" in kwargs and kwargs.get("media_type") not in [MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation]: + if "media_type" in kwargs and kwargs.get("media_type") not in [ + MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation + ]: raise ValueError( "media_type must be either LLMPromptCreation or LLMPromptResponseCreation" ) @@ -949,8 +955,7 @@ def create_response_creation_project(self, **kwargs) -> Project: Returns: Project: The created project """ - kwargs[ - "media_type"] = MediaType.Text # Only Text is supported + kwargs["media_type"] = MediaType.Text # Only Text is supported kwargs[ "editor_task_type"] = EditorTaskType.ResponseCreation.value # Special editor task type for response creation projects @@ -1005,7 +1010,8 @@ def _create_project(self, **kwargs) -> Project: if quality_modes and quality_mode: raise ValueError( - "Cannot use both quality_modes and quality_mode at the same time. Use one or the other.") + "Cannot use both quality_modes and quality_mode at the same time. Use one or the other." + ) if not quality_modes and not quality_mode: logger.info("Defaulting quality modes to Benchmark and Consensus.") @@ -1021,12 +1027,11 @@ def _create_project(self, **kwargs) -> Project: if quality_mode: quality_modes_set = {quality_mode} - if ( - quality_modes_set is None - or len(quality_modes_set) == 0 - or quality_modes_set == {QualityMode.Benchmark, QualityMode.Consensus} - ): - data["auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS + if (quality_modes_set is None or len(quality_modes_set) == 0 or + quality_modes_set + == {QualityMode.Benchmark, QualityMode.Consensus}): + data[ + "auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE data["is_benchmark_enabled"] = True data["is_consensus_enabled"] = True @@ -1297,10 +1302,12 @@ def create_ontology_from_feature_schemas( f"Tool `{tool}` not in list of supported tools.") elif 'type' in feature_schema.normalized: classification = feature_schema.normalized['type'] - if classification in Classification.Type._value2member_map_.keys(): + if classification in Classification.Type._value2member_map_.keys( + ): Classification.Type(classification) classifications.append(feature_schema.normalized) - elif classification in PromptResponseClassification.Type._value2member_map_.keys(): + elif classification in PromptResponseClassification.Type._value2member_map_.keys( + ): PromptResponseClassification.Type(classification) classifications.append(feature_schema.normalized) else: @@ -1518,7 +1525,8 @@ def create_ontology(self, raise get_media_type_validation_error(media_type) if ontology_kind and OntologyKind.is_supported(ontology_kind): - media_type = OntologyKind.evaluate_ontology_kind_with_media_type(ontology_kind, media_type) + media_type = OntologyKind.evaluate_ontology_kind_with_media_type( + ontology_kind, media_type) editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type( ontology_kind, media_type).value elif ontology_kind: diff --git a/libs/labelbox/src/labelbox/schema/labeling_service.py b/libs/labelbox/src/labelbox/schema/labeling_service.py index 3ebbdb11e..8c39daa49 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service.py @@ -77,7 +77,8 @@ def request(self) -> 'LabelingService': } } """ - result = self.client.execute(query_str, {"projectId": self.project_id}) + result = self.client.execute(query_str, {"projectId": self.project_id}, + raise_return_resource_not_found=True) success = result["validateAndRequestProjectBoostWorkforce"]["success"] if not success: raise Exception("Failed to start labeling service") diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py index 2ca0ea82a..761d0e391 100644 --- a/libs/labelbox/src/labelbox/schema/project.py +++ b/libs/labelbox/src/labelbox/schema/project.py @@ -1949,15 +1949,6 @@ def request_labeling_service(self) -> LabelingService: """ return LabelingService.start(self.client, self.uid) # type: ignore - @experimental - def start_labeling_service(self) -> LabelingService: - """Submit a request to start the labeling service for this project. - - Returns: - LabelingService: The labeling service for this project. - """ - return LabelingService.start(self.client, self.uid) # type: ignore - class ProjectMember(DbObject): user = Relationship.ToOne("User", cache=True) From d5dbb382d564dbf4df7ceda3ab2fcdc67dbaecda Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 1 Aug 2024 14:20:18 -0700 Subject: [PATCH 3/4] Add integration tests --- libs/labelbox/tests/conftest.py | 26 +++++-- .../integration/test_labeling_service.py | 69 ++++++++++++++++++- 2 files changed, 87 insertions(+), 8 deletions(-) diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index db47cc071..eea9851be 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -130,8 +130,7 @@ def rest_url(environ: str) -> str: def testing_api_key(environ: Environ) -> str: keys = [ f"LABELBOX_TEST_API_KEY_{environ.value.upper()}", - "LABELBOX_TEST_API_KEY", - "LABELBOX_API_KEY" + "LABELBOX_TEST_API_KEY", "LABELBOX_API_KEY" ] for key in keys: value = os.environ.get(key) @@ -318,11 +317,7 @@ def environ() -> Environ: 'prod' or 'staging' Make sure to set LABELBOX_TEST_ENVIRON in .github/workflows/python-package.yaml """ - keys = [ - "LABELBOX_TEST_ENV", - "LABELBOX_TEST_ENVIRON", - "LABELBOX_ENV" - ] + keys = ["LABELBOX_TEST_ENV", "LABELBOX_TEST_ENVIRON", "LABELBOX_ENV"] for key in keys: value = os.environ.get(key) if value is not None: @@ -742,6 +737,23 @@ def configured_batch_project_with_multiple_datarows(project, dataset, data_rows, label.delete() +@pytest.fixture +def configured_batch_project_for_labeling_service(project, + data_row_and_global_key): + """Project with a batch having multiple datarows + Project contains an ontology with 1 bbox tool + Additionally includes a create_label method for any needed extra labels + """ + global_keys = [data_row_and_global_key[1]] + + batch_name = f'batch {uuid.uuid4()}' + project.create_batch(batch_name, global_keys=global_keys) + + _setup_ontology(project) + + yield project + + # NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project # in case we still have flakiness in the future, we can use it @pytest.fixture diff --git a/libs/labelbox/tests/integration/test_labeling_service.py b/libs/labelbox/tests/integration/test_labeling_service.py index ccfd0b15c..a35777fe9 100644 --- a/libs/labelbox/tests/integration/test_labeling_service.py +++ b/libs/labelbox/tests/integration/test_labeling_service.py @@ -1,6 +1,6 @@ import pytest -from labelbox.exceptions import ResourceNotFoundError +from labelbox.exceptions import LabelboxError, ResourceNotFoundError from labelbox.schema.labeling_service import LabelingServiceStatus @@ -23,3 +23,70 @@ def test_start_labeling_service(project): labeling_service_status = project.get_labeling_service_status() assert labeling_service_status == LabelingServiceStatus.SetUp + + +def test_request_labeling_service( + configured_batch_project_for_labeling_service): + project = configured_batch_project_for_labeling_service + + project.upsert_instructions('tests/integration/media/sample_pdf.pdf') + + labeling_service = project.request_labeling_service( + ) # project fixture is an Image type project + labeling_service.request() + assert project.get_labeling_service_status( + ) == LabelingServiceStatus.Requested + + +def test_request_labeling_service_moe_offline_project( + rand_gen, offline_chat_evaluation_project, chat_evaluation_ontology, + offline_conversational_data_row, model_config): + project = offline_chat_evaluation_project + project.connect_ontology(chat_evaluation_ontology) + + project.create_batch( + rand_gen(str), + [offline_conversational_data_row.uid], # sample of data row objects + ) + + project.upsert_instructions('tests/integration/media/sample_pdf.pdf') + + labeling_service = project.request_labeling_service() + labeling_service.request() + assert project.get_labeling_service_status( + ) == LabelingServiceStatus.Requested + + +def test_request_labeling_service_moe_project( + rand_gen, live_chat_evaluation_project_with_new_dataset, + chat_evaluation_ontology, model_config): + project = live_chat_evaluation_project_with_new_dataset + project.connect_ontology(chat_evaluation_ontology) + + project.upsert_instructions('tests/integration/media/sample_pdf.pdf') + + labeling_service = project.request_labeling_service() + with pytest.raises( + LabelboxError, + match= + '[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]' + ): + labeling_service.request() + project.add_model_config(model_config.uid) + project.set_project_model_setup_complete() + + labeling_service.request() + assert project.get_labeling_service_status( + ) == LabelingServiceStatus.Requested + + +def test_request_labeling_service_incomplete_requirements(project, ontology): + labeling_service = project.request_labeling_service( + ) # project fixture is an Image type project + with pytest.raises(ResourceNotFoundError, + match="Associated ontology id could not be found" + ): # No labeling service by default + labeling_service.request() + project.connect_ontology(ontology) + with pytest.raises(LabelboxError): + labeling_service.request() From d029700c901e9e71e0d08420d30b577e250584b2 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Thu, 1 Aug 2024 14:37:06 -0700 Subject: [PATCH 4/4] Add docstring --- libs/labelbox/src/labelbox/schema/labeling_service.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/labeling_service.py b/libs/labelbox/src/labelbox/schema/labeling_service.py index 8c39daa49..3b0ef9445 100644 --- a/libs/labelbox/src/labelbox/schema/labeling_service.py +++ b/libs/labelbox/src/labelbox/schema/labeling_service.py @@ -61,12 +61,15 @@ def start(cls, client, project_id: Cuid) -> 'LabelingService': def request(self) -> 'LabelingService': """ - Starts the labeling service for the project. This is equivalent to a UI action to Request Specialized Labelers + Creates a request to labeling service to start labeling for the project. + Our back end will validate that the project is ready for labeling and then request the labeling service. Returns: LabelingService: The labeling service for the project. Raises: - Exception: If the service fails to start. + ResourceNotFoundError: If ontology is not associated with the project + or if any projects required prerequisites are missing. + """ query_str = """mutation ValidateAndRequestProjectBoostWorkforcePyApi($projectId: ID!) {