Skip to content

[PLT-1266] Added project and ontology creation for prompt response projects #1726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jul 25, 2024
2 changes: 2 additions & 0 deletions libs/labelbox/src/labelbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from labelbox.schema.asset_attachment import AssetAttachment
from labelbox.schema.webhook import Webhook
from labelbox.schema.ontology import Ontology, OntologyBuilder, Classification, Option, Tool, FeatureSchema
from labelbox.schema.ontology import PromptResponseClassification
from labelbox.schema.ontology import ResponseOption
from labelbox.schema.role import Role, ProjectRole
from labelbox.schema.invite import Invite, InviteLimit
from labelbox.schema.data_row_metadata import DataRowMetadataOntology, DataRowMetadataField, DataRowMetadata, DeleteDataRowMetadata
Expand Down
112 changes: 98 additions & 14 deletions libs/labelbox/src/labelbox/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from labelbox.schema.model_config import ModelConfig
from labelbox.schema.model_run import ModelRun
from labelbox.schema.ontology import Ontology, DeleteFeatureFromOntologyResult
from labelbox.schema.ontology import Tool, Classification, FeatureSchema
from labelbox.schema.ontology import Tool, Classification, FeatureSchema, PromptResponseClassification
from labelbox.schema.organization import Organization
from labelbox.schema.project import Project
from labelbox.schema.quality_mode import QualityMode, BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS, \
Expand Down Expand Up @@ -874,6 +874,92 @@ def create_offline_model_evaluation_project(self, **kwargs) -> Project:
kwargs.pop("data_row_count", None)

return self._create_project(**kwargs)


def create_prompt_response_generation_project(self,
dataset_id: Optional[str] = None,
dataset_name: Optional[str] = None,
data_row_count: int = 100,
**kwargs) -> Project:
"""
Use this method exclusively to create a prompt and response generation project.

Args:
dataset_name: When creating a new dataset, pass the name
dataset_id: When using an existing dataset, pass the id
data_row_count: The number of data row assets to use for the project
**kwargs: Additional parameters to pass see the create_project method
Returns:
Project: The created project

NOTE: Only a dataset_name or dataset_id should be included

Examples:
>>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", project_kind=MediaType.LLMPromptResponseCreation)
>>> This creates a new dataset with a default number of rows (100), creates new project and assigns a batch of the newly created datarows to the project.

>>> client.create_prompt_response_generation_project(name=project_name, dataset_name="new data set", data_row_count=10, project_kind=MediaType.LLMPromptCreation)
>>> This creates a new dataset with 10 data rows, creates new project and assigns a batch of the newly created datarows to the project.

>>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", project_kind=MediaType.LLMPromptCreation)
>>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created data rows to the project.

>>> client.create_prompt_response_generation_project(name=project_name, dataset_id="clr00u8j0j0j0", data_row_count=10, project_kind=MediaType.LLMPromptResponseCreation)
>>> This creates a new project, and adds 100 datarows to the dataset with id "clr00u8j0j0j0" and assigns a batch of the newly created 10 data rows to the project.

"""
if not dataset_id and not dataset_name:
raise ValueError(
"dataset_name or dataset_id must be present and not be an empty string."
)

if dataset_id and dataset_name:
raise ValueError(
"Only provide a dataset_name or dataset_id, not both."
)

if data_row_count <= 0:
raise ValueError("data_row_count must be a positive integer.")

if dataset_id:
append_to_existing_dataset = True
dataset_name_or_id = dataset_id
else:
append_to_existing_dataset = False
dataset_name_or_id = dataset_name

if "media_type" in kwargs and kwargs.get("media_type") not in [MediaType.LLMPromptCreation, MediaType.LLMPromptResponseCreation]:
raise ValueError(
"media_type must be either LLMPromptCreation or LLMPromptResponseCreation"
)

kwargs["dataset_name_or_id"] = dataset_name_or_id
kwargs["append_to_existing_dataset"] = append_to_existing_dataset
kwargs["data_row_count"] = data_row_count

kwargs.pop("editor_task_type", None)

return self._create_project(**kwargs)

def create_response_creation_project(self, **kwargs) -> Project:
"""
Creates a project for response creation.
Args:
**kwargs: Additional parameters to pass see the create_project method
Returns:
Project: The created project
"""
kwargs[
"media_type"] = MediaType.Text # Only Text is supported
kwargs[
"editor_task_type"] = EditorTaskType.ResponseCreation.value # Special editor task type for response creation projects

# The following arguments are not supported for response creation projects
kwargs.pop("dataset_name_or_id", None)
kwargs.pop("append_to_existing_dataset", None)
kwargs.pop("data_row_count", None)

return self._create_project(**kwargs)

def _create_project(self, **kwargs) -> Project:
auto_audit_percentage = kwargs.get("auto_audit_percentage")
Expand Down Expand Up @@ -1189,11 +1275,13 @@ def create_ontology_from_feature_schemas(
name (str): Name of the ontology
feature_schema_ids (List[str]): List of feature schema ids corresponding to
top level tools and classifications to include in the ontology
media_type (MediaType or None): Media type of a new ontology. NOTE for chat evaluation, we currently foce media_type to Conversational
media_type (MediaType or None): Media type of a new ontology.
ontology_kind (OntologyKind or None): set to OntologyKind.ModelEvaluation if the ontology is for chat evaluation,
leave as None otherwise.
Returns:
The created Ontology

NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text.
"""
tools, classifications = [], []
for feature_schema_id in feature_schema_ids:
Expand All @@ -1209,10 +1297,13 @@ def create_ontology_from_feature_schemas(
f"Tool `{tool}` not in list of supported tools.")
elif 'type' in feature_schema.normalized:
classification = feature_schema.normalized['type']
try:
if classification in Classification.Type._value2member_map_.keys():
Classification.Type(classification)
classifications.append(feature_schema.normalized)
except ValueError:
elif classification in PromptResponseClassification.Type._value2member_map_.keys():
PromptResponseClassification.Type(classification)
classifications.append(feature_schema.normalized)
else:
raise ValueError(
f"Classification `{classification}` not in list of supported classifications."
)
Expand All @@ -1222,15 +1313,7 @@ def create_ontology_from_feature_schemas(
)
normalized = {'tools': tools, 'classifications': classifications}

if ontology_kind and ontology_kind is OntologyKind.ModelEvaluation:
if media_type is None:
media_type = MediaType.Conversational
else:
if media_type is not MediaType.Conversational:
raise ValueError(
"For chat evaluation, media_type must be Conversational."
)

# validation for ontology_kind and media_type is done within self.create_ontology
return self.create_ontology(name=name,
normalized=normalized,
media_type=media_type,
Expand Down Expand Up @@ -1424,7 +1507,7 @@ def create_ontology(self,
Returns:
The created Ontology

NOTE caller of this method is expected to set media_type to Conversational if ontology_kind is ModelEvaluation
NOTE for chat evaluation, we currently force media_type to Conversational and for response creation, we force media_type to Text.
"""

media_type_value = None
Expand All @@ -1435,6 +1518,7 @@ def create_ontology(self,
raise get_media_type_validation_error(media_type)

if ontology_kind and OntologyKind.is_supported(ontology_kind):
media_type = OntologyKind.evaluate_ontology_kind_with_media_type(ontology_kind, media_type)
editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type(
ontology_kind, media_type).value
elif ontology_kind:
Expand Down
Loading
Loading