Skip to content

Commit 535e83e

Browse files
authored
[PLT-1266] Added project and ontology creation for prompt response projects (#1726)
1 parent dbfa46a commit 535e83e

File tree

8 files changed

+634
-34
lines changed

8 files changed

+634
-34
lines changed

libs/labelbox/src/labelbox/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
from labelbox.schema.asset_attachment import AssetAttachment
2424
from labelbox.schema.webhook import Webhook
2525
from labelbox.schema.ontology import Ontology, OntologyBuilder, Classification, Option, Tool, FeatureSchema
26+
from labelbox.schema.ontology import PromptResponseClassification
27+
from labelbox.schema.ontology import ResponseOption
2628
from labelbox.schema.role import Role, ProjectRole
2729
from labelbox.schema.invite import Invite, InviteLimit
2830
from labelbox.schema.data_row_metadata import DataRowMetadataOntology, DataRowMetadataField, DataRowMetadata, DeleteDataRowMetadata

libs/labelbox/src/labelbox/client.py

Lines changed: 98 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from labelbox.schema.model_config import ModelConfig
4242
from labelbox.schema.model_run import ModelRun
4343
from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult
44-
from labelbox.schema.ontology import Tool, Classification, FeatureSchema
44+
from labelbox.schema.ontology import Tool, Classification, FeatureSchema, PromptResponseClassification
4545
from labelbox.schema.organization import Organization
4646
from labelbox.schema.project import Project
4747
from labelbox.schema.quality_mode import QualityMode, BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, \
@@ -874,6 +874,92 @@ def create_offline_model_evaluation_project(self, **kwargs) -> Project:
874874
kwargs.pop("data_row_count", None)
875875

876876
return self._create_project(**kwargs)
877+
878+
879+
def create_prompt_response_generation_project(self,
880+
dataset_id: Optional[str] = None,
881+
dataset_name: Optional[str] = None,
882+
data_row_count: int = 100,
883+
**kwargs) -> Project:
884+
"""
885+
Use this method exclusively to create a prompt and response generation project.
886+
887+
Args:
888+
dataset_name: When creating a new dataset, pass the name
889+
dataset_id: When using an existing dataset, pass the id
890+
data_row_count: The number of data row assets to use for the project
891+
**kwargs: Additional parameters to pass see the create_project method
892+
Returns:
893+
Project: The created project
894+
895+
NOTE: Only a dataset_name or dataset_id should be included
896+
897+
Examples:
898+
>>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", project_kind=MediaType.LLMPromptResponseCreation)
899+
>>> This creates a new dataset with a default number of rows (100), creates new project and assigns a batch of the newly created datarows to the project.
900+
901+
>>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", data_row_count=10, project_kind=MediaType.LLMPromptCreation)
902+
>>> This creates a new dataset with 10 data rows, creates new project and assigns a batch of the newly created datarows to the project.
903+
904+
>>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", project_kind=MediaType.LLMPromptCreation)
905+
>>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project.
906+
907+
>>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10, project_kind=MediaType.LLMPromptResponseCreation)
908+
>>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project.
909+
910+
"""
911+
if not dataset_id and not dataset_name:
912+
raise ValueError(
913+
"dataset_name or dataset_id must be present and not be an empty string."
914+
)
915+
916+
if dataset_id and dataset_name:
917+
raise ValueError(
918+
"Only provide a dataset_name or dataset_id, not both."
919+
)
920+
921+
if data_row_count <= 0:
922+
raise ValueError("data_row_count must be a positive integer.")
923+
924+
if dataset_id:
925+
append_to_existing_dataset = True
926+
dataset_name_or_id = dataset_id
927+
else:
928+
append_to_existing_dataset = False
929+
dataset_name_or_id = dataset_name
930+
931+
if "media_type" in kwargs and kwargs.get("media_type") not in [MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation]:
932+
raise ValueError(
933+
"media_type must be either LLMPromptCreation or LLMPromptResponseCreation"
934+
)
935+
936+
kwargs["dataset_name_or_id"] = dataset_name_or_id
937+
kwargs["append_to_existing_dataset"] = append_to_existing_dataset
938+
kwargs["data_row_count"] = data_row_count
939+
940+
kwargs.pop("editor_task_type", None)
941+
942+
return self._create_project(**kwargs)
943+
944+
def create_response_creation_project(self, **kwargs) -> Project:
945+
"""
946+
Creates a project for response creation.
947+
Args:
948+
**kwargs: Additional parameters to pass see the create_project method
949+
Returns:
950+
Project: The created project
951+
"""
952+
kwargs[
953+
"media_type"] = MediaType.Text # Only Text is supported
954+
kwargs[
955+
"editor_task_type"] = EditorTaskType.ResponseCreation.value # Special editor task type for response creation projects
956+
957+
# The following arguments are not supported for response creation projects
958+
kwargs.pop("dataset_name_or_id", None)
959+
kwargs.pop("append_to_existing_dataset", None)
960+
kwargs.pop("data_row_count", None)
961+
962+
return self._create_project(**kwargs)
877963

878964
def _create_project(self, **kwargs) -> Project:
879965
auto_audit_percentage = kwargs.get("auto_audit_percentage")
@@ -1189,11 +1275,13 @@ def create_ontology_from_feature_schemas(
11891275
name (str): Name of the ontology
11901276
feature_schema_ids (List[str]): List of feature schema ids corresponding to
11911277
top level tools and classifications to include in the ontology
1192-
media_type (MediaType or None): Media type of a new ontology. NOTE for chat evaluation, we currently foce media_type to Conversational
1278+
media_type (MediaType or None): Media type of a new ontology.
11931279
ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation,
11941280
leave as None otherwise.
11951281
Returns:
11961282
The created Ontology
1283+
1284+
NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text.
11971285
"""
11981286
tools, classifications = [], []
11991287
for feature_schema_id in feature_schema_ids:
@@ -1209,10 +1297,13 @@ def create_ontology_from_feature_schemas(
12091297
f"Tool `{tool}` not in list of supported tools.")
12101298
elif 'type' in feature_schema.normalized:
12111299
classification = feature_schema.normalized['type']
1212-
try:
1300+
if classification in Classification.Type._value2member_map_.keys():
12131301
Classification.Type(classification)
12141302
classifications.append(feature_schema.normalized)
1215-
except ValueError:
1303+
elif classification in PromptResponseClassification.Type._value2member_map_.keys():
1304+
PromptResponseClassification.Type(classification)
1305+
classifications.append(feature_schema.normalized)
1306+
else:
12161307
raise ValueError(
12171308
f"Classification `{classification}` not in list of supported classifications."
12181309
)
@@ -1222,15 +1313,7 @@ def create_ontology_from_feature_schemas(
12221313
)
12231314
normalized = {'tools': tools, 'classifications': classifications}
12241315

1225-
if ontology_kind and ontology_kind is OntologyKind.ModelEvaluation:
1226-
if media_type is None:
1227-
media_type = MediaType.Conversational
1228-
else:
1229-
if media_type is not MediaType.Conversational:
1230-
raise ValueError(
1231-
"For chat evaluation, media_type must be Conversational."
1232-
)
1233-
1316+
# validation for ontology_kind and media_type is done within self.create_ontology
12341317
return self.create_ontology(name=name,
12351318
normalized=normalized,
12361319
media_type=media_type,
@@ -1424,7 +1507,7 @@ def create_ontology(self,
14241507
Returns:
14251508
The created Ontology
14261509
1427-
NOTE caller of this method is expected to set media_type to Conversational if ontology_kind is ModelEvaluation
1510+
NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text.
14281511
"""
14291512

14301513
media_type_value = None
@@ -1435,6 +1518,7 @@ def create_ontology(self,
14351518
raise get_media_type_validation_error(media_type)
14361519

14371520
if ontology_kind and OntologyKind.is_supported(ontology_kind):
1521+
media_type = OntologyKind.evaluate_ontology_kind_with_media_type(ontology_kind, media_type)
14381522
editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type(
14391523
ontology_kind, media_type).value
14401524
elif ontology_kind:

0 commit comments

Comments
 (0)