Skip to content

[PLT-1347] Vb/request labelin service #1761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 33 additions & 25 deletions libs/labelbox/src/labelbox/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def execute(self,
files=None,
timeout=60.0,
experimental=False,
error_log_key="message"):
error_log_key="message",
raise_return_resource_not_found=False):
""" Sends a request to the server for the execution of the
given query.

Expand Down Expand Up @@ -297,9 +298,13 @@ def get_error_status_code(error: dict) -> int:
resource_not_found_error = check_errors(["RESOURCE_NOT_FOUND"],
"extensions", "code")
if resource_not_found_error is not None:
# Return None and let the caller methods raise an exception
# as they already know which resource type and ID was requested
return None
if raise_return_resource_not_found:
raise labelbox.exceptions.ResourceNotFoundError(
message=resource_not_found_error["message"])
else:
# Return None and let the caller methods raise an exception
# as they already know which resource type and ID was requested
return None

resource_conflict_error = check_errors(["RESOURCE_CONFLICT"],
"extensions", "code")
Expand Down Expand Up @@ -875,12 +880,12 @@ def create_offline_model_evaluation_project(self, **kwargs) -> Project:

return self._create_project(**kwargs)


def create_prompt_response_generation_project(self,
dataset_id: Optional[str] = None,
dataset_name: Optional[str] = None,
data_row_count: int = 100,
**kwargs) -> Project:
def create_prompt_response_generation_project(
self,
dataset_id: Optional[str] = None,
dataset_name: Optional[str] = None,
data_row_count: int = 100,
**kwargs) -> Project:
"""
Use this method exclusively to create a prompt and response generation project.

Expand Down Expand Up @@ -915,8 +920,7 @@ def create_prompt_response_generation_project(self,

if dataset_id and dataset_name:
raise ValueError(
"Only provide a dataset_name or dataset_id, not both."
)
"Only provide a dataset_name or dataset_id, not both.")

if data_row_count <= 0:
raise ValueError("data_row_count must be a positive integer.")
Expand All @@ -928,7 +932,9 @@ def create_prompt_response_generation_project(self,
append_to_existing_dataset = False
dataset_name_or_id = dataset_name

if "media_type" in kwargs and kwargs.get("media_type") not in [MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation]:
if "media_type" in kwargs and kwargs.get("media_type") not in [
MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation
]:
raise ValueError(
"media_type must be either LLMPromptCreation or LLMPromptResponseCreation"
)
Expand All @@ -949,8 +955,7 @@ def create_response_creation_project(self, **kwargs) -> Project:
Returns:
Project: The created project
"""
kwargs[
"media_type"] = MediaType.Text # Only Text is supported
kwargs["media_type"] = MediaType.Text # Only Text is supported
kwargs[
"editor_task_type"] = EditorTaskType.ResponseCreation.value # Special editor task type for response creation projects

Expand Down Expand Up @@ -1005,7 +1010,8 @@ def _create_project(self, **kwargs) -> Project:

if quality_modes and quality_mode:
raise ValueError(
"Cannot use both quality_modes and quality_mode at the same time. Use one or the other.")
"Cannot use both quality_modes and quality_mode at the same time. Use one or the other."
)

if not quality_modes and not quality_mode:
logger.info("Defaulting quality modes to Benchmark and Consensus.")
Expand All @@ -1021,12 +1027,11 @@ def _create_project(self, **kwargs) -> Project:
if quality_mode:
quality_modes_set = {quality_mode}

if (
quality_modes_set is None
or len(quality_modes_set) == 0
or quality_modes_set == {QualityMode.Benchmark, QualityMode.Consensus}
):
data["auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS
if (quality_modes_set is None or len(quality_modes_set) == 0 or
quality_modes_set
== {QualityMode.Benchmark, QualityMode.Consensus}):
data[
"auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS
data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE
data["is_benchmark_enabled"] = True
data["is_consensus_enabled"] = True
Expand Down Expand Up @@ -1297,10 +1302,12 @@ def create_ontology_from_feature_schemas(
f"Tool `{tool}` not in list of supported tools.")
elif 'type' in feature_schema.normalized:
classification = feature_schema.normalized['type']
if classification in Classification.Type._value2member_map_.keys():
if classification in Classification.Type._value2member_map_.keys(
):
Classification.Type(classification)
classifications.append(feature_schema.normalized)
elif classification in PromptResponseClassification.Type._value2member_map_.keys():
elif classification in PromptResponseClassification.Type._value2member_map_.keys(
):
PromptResponseClassification.Type(classification)
classifications.append(feature_schema.normalized)
else:
Expand Down Expand Up @@ -1518,7 +1525,8 @@ def create_ontology(self,
raise get_media_type_validation_error(media_type)

if ontology_kind and OntologyKind.is_supported(ontology_kind):
media_type = OntologyKind.evaluate_ontology_kind_with_media_type(ontology_kind, media_type)
media_type = OntologyKind.evaluate_ontology_kind_with_media_type(
ontology_kind, media_type)
editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type(
ontology_kind, media_type).value
elif ontology_kind:
Expand Down
41 changes: 35 additions & 6 deletions libs/labelbox/src/labelbox/schema/labeling_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

Cuid = Annotated[str, Field(min_length=25, max_length=25)]


class LabelingServiceStatus(Enum):
Accepted = 'ACCEPTED',
Calibration = 'CALIBRATION',
Complete = 'COMPLETE',
Production = 'PRODUCTION',
Requested = 'REQUESTED',
Accepted = 'ACCEPTED'
Calibration = 'CALIBRATION'
Complete = 'COMPLETE'
Production = 'PRODUCTION'
Requested = 'REQUESTED'
SetUp = 'SET_UP'


Expand All @@ -40,7 +41,7 @@ class Config(_CamelCaseMixin.Config):
@classmethod
def start(cls, client, project_id: Cuid) -> 'LabelingService':
"""
Starts the labeling service for the project. This is equivalent to a UI acction to Request Specialized Labelers
Starts the labeling service for the project. This is equivalent to a UI action to Request Specialized Labelers

Returns:
LabelingService: The labeling service for the project.
Expand All @@ -58,6 +59,34 @@ def start(cls, client, project_id: Cuid) -> 'LabelingService':
raise Exception("Failed to start labeling service")
return cls.get(client, project_id)

def request(self) -> 'LabelingService':
"""
Creates a request to labeling service to start labeling for the project.
Our back end will validate that the project is ready for labeling and then request the labeling service.

Returns:
LabelingService: The labeling service for the project.
Raises:
ResourceNotFoundError: If ontology is not associated with the project
or if any projects required prerequisites are missing.

"""

query_str = """mutation ValidateAndRequestProjectBoostWorkforcePyApi($projectId: ID!) {
validateAndRequestProjectBoostWorkforce(
data: { projectId: $projectId }
) {
success
}
}
"""
result = self.client.execute(query_str, {"projectId": self.project_id},
raise_return_resource_not_found=True)
success = result["validateAndRequestProjectBoostWorkforce"]["success"]
if not success:
raise Exception("Failed to start labeling service")
return LabelingService.get(self.client, self.project_id)

@classmethod
def get(cls, client, project_id: Cuid) -> 'LabelingService':
"""
Expand Down
26 changes: 19 additions & 7 deletions libs/labelbox/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ def rest_url(environ: str) -> str:
def testing_api_key(environ: Environ) -> str:
keys = [
f"LABELBOX_TEST_API_KEY_{environ.value.upper()}",
"LABELBOX_TEST_API_KEY",
"LABELBOX_API_KEY"
"LABELBOX_TEST_API_KEY", "LABELBOX_API_KEY"
]
for key in keys:
value = os.environ.get(key)
Expand Down Expand Up @@ -318,11 +317,7 @@ def environ() -> Environ:
'prod' or 'staging'
Make sure to set LABELBOX_TEST_ENVIRON in .github/workflows/python-package.yaml
"""
keys = [
"LABELBOX_TEST_ENV",
"LABELBOX_TEST_ENVIRON",
"LABELBOX_ENV"
]
keys = ["LABELBOX_TEST_ENV", "LABELBOX_TEST_ENVIRON", "LABELBOX_ENV"]
for key in keys:
value = os.environ.get(key)
if value is not None:
Expand Down Expand Up @@ -742,6 +737,23 @@ def configured_batch_project_with_multiple_datarows(project, dataset, data_rows,
label.delete()


@pytest.fixture
def configured_batch_project_for_labeling_service(project,
data_row_and_global_key):
"""Project with a batch having multiple datarows
Project contains an ontology with 1 bbox tool
Additionally includes a create_label method for any needed extra labels
"""
global_keys = [data_row_and_global_key[1]]

batch_name = f'batch {uuid.uuid4()}'
project.create_batch(batch_name, global_keys=global_keys)

_setup_ontology(project)

yield project


# NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project
# in case we still have flakiness in the future, we can use it
@pytest.fixture
Expand Down
69 changes: 68 additions & 1 deletion libs/labelbox/tests/integration/test_labeling_service.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from labelbox.exceptions import ResourceNotFoundError
from labelbox.exceptions import LabelboxError, ResourceNotFoundError
from labelbox.schema.labeling_service import LabelingServiceStatus


Expand All @@ -23,3 +23,70 @@ def test_start_labeling_service(project):

labeling_service_status = project.get_labeling_service_status()
assert labeling_service_status == LabelingServiceStatus.SetUp


def test_request_labeling_service(
configured_batch_project_for_labeling_service):
project = configured_batch_project_for_labeling_service

project.upsert_instructions('tests/integration/media/sample_pdf.pdf')

labeling_service = project.request_labeling_service(
) # project fixture is an Image type project
labeling_service.request()
assert project.get_labeling_service_status(
) == LabelingServiceStatus.Requested


def test_request_labeling_service_moe_offline_project(
rand_gen, offline_chat_evaluation_project, chat_evaluation_ontology,
offline_conversational_data_row, model_config):
project = offline_chat_evaluation_project
project.connect_ontology(chat_evaluation_ontology)

project.create_batch(
rand_gen(str),
[offline_conversational_data_row.uid], # sample of data row objects
)

project.upsert_instructions('tests/integration/media/sample_pdf.pdf')

labeling_service = project.request_labeling_service()
labeling_service.request()
assert project.get_labeling_service_status(
) == LabelingServiceStatus.Requested


def test_request_labeling_service_moe_project(
rand_gen, live_chat_evaluation_project_with_new_dataset,
chat_evaluation_ontology, model_config):
project = live_chat_evaluation_project_with_new_dataset
project.connect_ontology(chat_evaluation_ontology)

project.upsert_instructions('tests/integration/media/sample_pdf.pdf')

labeling_service = project.request_labeling_service()
with pytest.raises(
LabelboxError,
match=
'[{"errorType":"PROJECT_MODEL_CONFIG","errorMessage":"Project model config is not completed"}]'
):
labeling_service.request()
project.add_model_config(model_config.uid)
project.set_project_model_setup_complete()

labeling_service.request()
assert project.get_labeling_service_status(
) == LabelingServiceStatus.Requested


def test_request_labeling_service_incomplete_requirements(project, ontology):
labeling_service = project.request_labeling_service(
) # project fixture is an Image type project
with pytest.raises(ResourceNotFoundError,
match="Associated ontology id could not be found"
): # No labeling service by default
labeling_service.request()
project.connect_ontology(ontology)
with pytest.raises(LabelboxError):
labeling_service.request()
Loading