From ff31b93a493c250358f1139fca311f472a190932 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 18 Oct 2024 16:27:23 -0700 Subject: [PATCH 1/6] Add step by step reasoning ontology tool --- libs/labelbox/src/labelbox/schema/ontology.py | 29 ++++- .../labelbox/schema/tool_building/__init__.py | 2 + .../tool_building/step_reasoning_tool.py | 117 ++++++++++++++++++ .../schema/tool_building/tool_type.py | 5 + libs/labelbox/tests/integration/conftest.py | 4 + .../test_chat_evaluation_ontology_project.py | 2 +- .../unit/test_unit_step_reasoning_tool.py | 39 ++++++ 7 files changed, 192 insertions(+), 6 deletions(-) create mode 100644 libs/labelbox/src/labelbox/schema/tool_building/__init__.py create mode 100644 libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py create mode 100644 libs/labelbox/src/labelbox/schema/tool_building/tool_type.py create mode 100644 libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py diff --git a/libs/labelbox/src/labelbox/schema/ontology.py b/libs/labelbox/src/labelbox/schema/ontology.py index a3b388ef2..22bf822b0 100644 --- a/libs/labelbox/src/labelbox/schema/ontology.py +++ b/libs/labelbox/src/labelbox/schema/ontology.py @@ -12,6 +12,8 @@ from labelbox.orm.db_object import DbObject from labelbox.orm.model import Field, Relationship +from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool +from labelbox.schema.tool_building.tool_type import ToolType FeatureSchemaId: Type[str] = Annotated[ str, StringConstraints(min_length=25, max_length=25) @@ -187,7 +189,7 @@ def __post_init__(self): @classmethod def from_dict(cls, dictionary: Dict[str, Any]) -> Dict[str, Any]: return cls( - class_type=cls.Type(dictionary["type"]), + class_type=Classification.Type(dictionary["type"]), name=dictionary["name"], instructions=dictionary["instructions"], required=dictionary.get("required", False), @@ -351,7 +353,7 @@ class Type(Enum): @classmethod def from_dict(cls, dictionary: Dict[str, Any]) -> Dict[str, Any]: return cls( - class_type=cls.Type(dictionary["type"]), + class_type=Type(dictionary["type"]), name=dictionary["name"], instructions=dictionary["instructions"], required=True, # always required @@ -458,7 +460,7 @@ def from_dict(cls, dictionary: Dict[str, Any]) -> Dict[str, Any]: schema_id=dictionary.get("schemaNodeId", None), feature_schema_id=dictionary.get("featureSchemaId", None), required=dictionary.get("required", False), - tool=cls.Type(dictionary["tool"]), + tool=Tool.Type(dictionary["tool"]), classifications=[ Classification.from_dict(c) for c in dictionary["classifications"] @@ -488,6 +490,16 @@ def add_classification(self, classification: Classification) -> None: self.classifications.append(classification) +def tool_cls_from_type(tool_type: str): + if tool_type.lower() == ToolType.STEP_REASONING.value: + from labelbox.schema.tool_building.step_reasoning_tool import ( + StepReasoningTool, + ) + + return StepReasoningTool + return Tool + + class Ontology(DbObject): """An ontology specifies which tools and classifications are available to a project. This is read only for now. @@ -521,11 +533,18 @@ def __init__(self, *args, **kwargs) -> None: Union[List[Classification], List[PromptResponseClassification]] ] = None + def _tool_deserializer_cls(self, tool: Dict[str, Any]) -> Tool: + import pdb + + pdb.set_trace() + return Tool + def tools(self) -> List[Tool]: """Get list of tools (AKA objects) in an Ontology.""" if self._tools is None: self._tools = [ - Tool.from_dict(tool) for tool in self.normalized["tools"] + tool_cls_from_type(tool["tool"]).from_dict(tool) + for tool in self.normalized["tools"] ] return self._tools @@ -581,7 +600,7 @@ class OntologyBuilder: """ - tools: List[Tool] = field(default_factory=list) + tools: List[Union[Tool, StepReasoningTool]] = field(default_factory=list) classifications: List[ Union[Classification, PromptResponseClassification] ] = field(default_factory=list) diff --git a/libs/labelbox/src/labelbox/schema/tool_building/__init__.py b/libs/labelbox/src/labelbox/schema/tool_building/__init__.py new file mode 100644 index 000000000..5ebc09578 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/tool_building/__init__.py @@ -0,0 +1,2 @@ +import labelbox.schema.tool_building.tool_type +import labelbox.schema.tool_building.step_reasoning_tool \ No newline at end of file diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py new file mode 100644 index 000000000..0eb176fc4 --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -0,0 +1,117 @@ +from dataclasses import dataclass, field +from typing import Any, Dict, Optional + +from labelbox.schema.tool_building.tool_type import ToolType + + +@dataclass +class StepReasoningVariant: + id: int + name: str + + def asdict(self) -> Dict[str, Any]: + return {"id": self.id, "name": self.name} + + +@dataclass +class IncorrectStepReasoningVariant: + id: int + name: str + regenerate_conversations_after_incorrect_step: Optional[bool] = False + rate_alternative_responses: Optional[bool] = False + + def asdict(self) -> Dict[str, Any]: + actions = [] + if self.regenerate_conversations_after_incorrect_step: + actions.append("regenerateSteps") + if self.rate_alternative_responses: + actions.append("generateAlternatives") + return {"id": self.id, "name": self.name, "actions": actions} + + +@dataclass +class StepReasoningVariants: + CORRECT_STEP_ID = 0 + NEUTRAL_STEP_ID = 1 + INCORRECT_STEP_ID = 2 + + correct_step: StepReasoningVariant = field( + default=StepReasoningVariant(CORRECT_STEP_ID, "Correct"), init=False + ) + neutral_step: StepReasoningVariant = field( + default=StepReasoningVariant(NEUTRAL_STEP_ID, "Neutral"), init=False + ) + incorrect_step: IncorrectStepReasoningVariant = field( + default=IncorrectStepReasoningVariant(INCORRECT_STEP_ID, "Incorrect"), + ) + + def asdict(self): + return [ + self.correct_step.asdict(), + self.neutral_step.asdict(), + self.incorrect_step.asdict(), + ] + + +@dataclass +class StepReasoningDefinition: + variants: StepReasoningVariants = field( + default_factory=StepReasoningVariants + ) + version: int = field(default=1) + title: Optional[str] = None + value: Optional[str] = None + color: Optional[str] = None + + def asdict(self) -> Dict[str, Any]: + result = {"variants": self.variants.asdict(), "version": self.version} + if self.title is not None: + result["title"] = self.title + if self.value is not None: + result["value"] = self.value + if self.color is not None: + result["color"] = self.color + return result + + +@dataclass +class StepReasoningTool: + name: str + type: ToolType = field(default=ToolType.STEP_REASONING, init=False) + required: bool = False + schema_id: Optional[str] = None + feature_schema_id: Optional[str] = None + color: Optional[str] = None + definition: StepReasoningDefinition = field( + default_factory=StepReasoningDefinition + ) + + def set_regenerate_conversations_after_incorrect_step(self): + self.definition.variants.incorrect_step.regenerate_conversations_after_incorrect_step = True + + def set_rate_alternative_responses(self): + self.definition.variants.incorrect_step.rate_alternative_responses = ( + True + ) + + def asdict(self) -> Dict[str, Any]: + self.set_rate_alternative_responses() + self.set_regenerate_conversations_after_incorrect_step() + return { + "tool": self.type.value, + "name": self.name, + "required": self.required, + "schemaNodeId": self.schema_id, + "featureSchemaId": self.feature_schema_id, + "definition": self.definition.asdict(), + } + + @classmethod + def from_dict(cls, dictionary: Dict[str, Any]) -> "StepReasoningTool": + return cls( + name=dictionary["name"], + schema_id=dictionary.get("schemaNodeId", None), + feature_schema_id=dictionary.get("featureSchemaId", None), + required=dictionary.get("required", False), + definition=StepReasoningDefinition(**dictionary["definition"]), + ) diff --git a/libs/labelbox/src/labelbox/schema/tool_building/tool_type.py b/libs/labelbox/src/labelbox/schema/tool_building/tool_type.py new file mode 100644 index 000000000..bbe8f231f --- /dev/null +++ b/libs/labelbox/src/labelbox/schema/tool_building/tool_type.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class ToolType(Enum): + STEP_REASONING = "step-reasoning" diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index c248bf67e..5d80509e1 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -21,6 +21,8 @@ ) from labelbox.schema.data_row import DataRowMetadataField from labelbox.schema.ontology_kind import OntologyKind +from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool +from labelbox.schema.tool_building.tool_type import ToolType from labelbox.schema.user import User @@ -562,6 +564,7 @@ def feature_schema(client, point): @pytest.fixture def chat_evaluation_ontology(client, rand_gen): ontology_name = f"test-chat-evaluation-ontology-{rand_gen(str)}" + ontology_builder = OntologyBuilder( tools=[ Tool( @@ -576,6 +579,7 @@ def chat_evaluation_ontology(client, rand_gen): tool=Tool.Type.MESSAGE_RANKING, name="model output multi ranking", ), + StepReasoningTool(name="step reasoning"), ], classifications=[ Classification( diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 2c02b77ac..4a633be9b 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -15,7 +15,7 @@ def test_create_chat_evaluation_ontology_project( # here we are essentially testing the ontology creation which is a fixture assert ontology assert ontology.name - assert len(ontology.tools()) == 3 + assert len(ontology.tools()) == 4 for tool in ontology.tools(): assert tool.schema_id assert tool.feature_schema_id diff --git a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py new file mode 100644 index 000000000..f364ab55c --- /dev/null +++ b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py @@ -0,0 +1,39 @@ +from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool + + +def test_step_reasoning_as_dict_default(): + tool = StepReasoningTool(name="step reasoning") + assert tool.asdict() == { + "tool": "step-reasoning", + "name": "step reasoning", + "required": False, + "schemaNodeId": None, + "featureSchemaId": None, + "variants": [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + {"id": 2, "name": "Incorrect", "actions": []}, + ], + } + + +def test_step_reasoning_as_dict_with_actions(): + tool = StepReasoningTool(name="step reasoning") + tool.set_rate_alternative_responses() + tool.set_regenerate_conversations_after_incorrect_step() + assert tool.asdict() == { + "tool": "step-reasoning", + "name": "step reasoning", + "required": False, + "schemaNodeId": None, + "featureSchemaId": None, + "variants": [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + { + "id": 2, + "name": "Incorrect", + "actions": ["regenerateSteps", "generateAlternatives"], + }, + ], + } From b29d1c058bd73a007ab8725c6dd1eef237e93039 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 22 Oct 2024 15:51:46 -0700 Subject: [PATCH 2/6] Update to support create_ontology_from_feature_schemas --- libs/labelbox/src/labelbox/client.py | 5 +- libs/labelbox/src/labelbox/schema/ontology.py | 18 ++--- .../labelbox/schema/tool_building/__init__.py | 2 +- .../tool_building/step_reasoning_tool.py | 80 ++++++++++++++++--- libs/labelbox/tests/integration/conftest.py | 2 - .../test_chat_evaluation_ontology_project.py | 25 +++++- .../test_offline_chat_evaluation_project.py | 1 - .../tests/integration/test_ontology.py | 45 +++++++++++ .../unit/test_unit_step_reasoning_tool.py | 44 ++++++---- 9 files changed, 181 insertions(+), 41 deletions(-) diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py index 52a044a3b..d4376d9b4 100644 --- a/libs/labelbox/src/labelbox/client.py +++ b/libs/labelbox/src/labelbox/client.py @@ -56,7 +56,7 @@ FeatureSchema, Ontology, PromptResponseClassification, - Tool, + tool_type_cls_from_type, ) from labelbox.schema.ontology_kind import ( EditorTaskType, @@ -1098,7 +1098,8 @@ def create_ontology_from_feature_schemas( if "tool" in feature_schema.normalized: tool = feature_schema.normalized["tool"] try: - Tool.Type(tool) + tool_type_cls = tool_type_cls_from_type(tool) + tool_type_cls(tool) tools.append(feature_schema.normalized) except ValueError: raise ValueError( diff --git a/libs/labelbox/src/labelbox/schema/ontology.py b/libs/labelbox/src/labelbox/schema/ontology.py index 22bf822b0..3acd9e1e2 100644 --- a/libs/labelbox/src/labelbox/schema/ontology.py +++ b/libs/labelbox/src/labelbox/schema/ontology.py @@ -353,7 +353,7 @@ class Type(Enum): @classmethod def from_dict(cls, dictionary: Dict[str, Any]) -> Dict[str, Any]: return cls( - class_type=Type(dictionary["type"]), + class_type=PromptResponseClassification.Type(dictionary["type"]), name=dictionary["name"], instructions=dictionary["instructions"], required=True, # always required @@ -492,14 +492,16 @@ def add_classification(self, classification: Classification) -> None: def tool_cls_from_type(tool_type: str): if tool_type.lower() == ToolType.STEP_REASONING.value: - from labelbox.schema.tool_building.step_reasoning_tool import ( - StepReasoningTool, - ) - return StepReasoningTool return Tool +def tool_type_cls_from_type(tool_type: str): + if tool_type.lower() == ToolType.STEP_REASONING.value: + return ToolType + return Tool.Type + + class Ontology(DbObject): """An ontology specifies which tools and classifications are available to a project. This is read only for now. @@ -533,12 +535,6 @@ def __init__(self, *args, **kwargs) -> None: Union[List[Classification], List[PromptResponseClassification]] ] = None - def _tool_deserializer_cls(self, tool: Dict[str, Any]) -> Tool: - import pdb - - pdb.set_trace() - return Tool - def tools(self) -> List[Tool]: """Get list of tools (AKA objects) in an Ontology.""" if self._tools is None: diff --git a/libs/labelbox/src/labelbox/schema/tool_building/__init__.py b/libs/labelbox/src/labelbox/schema/tool_building/__init__.py index 5ebc09578..45098ef84 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/__init__.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/__init__.py @@ -1,2 +1,2 @@ import labelbox.schema.tool_building.tool_type -import labelbox.schema.tool_building.step_reasoning_tool \ No newline at end of file +import labelbox.schema.tool_building.step_reasoning_tool diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py index 0eb176fc4..280bd3ef7 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from labelbox.schema.tool_building.tool_type import ToolType @@ -25,9 +25,40 @@ def asdict(self) -> Dict[str, Any]: if self.regenerate_conversations_after_incorrect_step: actions.append("regenerateSteps") if self.rate_alternative_responses: - actions.append("generateAlternatives") + actions.append("generateAndRateAlternativeSteps") return {"id": self.id, "name": self.name, "actions": actions} + @classmethod + def from_dict( + cls, dictionary: Dict[str, Any] + ) -> "IncorrectStepReasoningVariant": + return cls( + id=dictionary["id"], + name=dictionary["name"], + regenerate_conversations_after_incorrect_step="regenerateSteps" + in dictionary.get("actions", []), + rate_alternative_responses="generateAndRateAlternativeSteps" + in dictionary.get("actions", []), + ) + + +def _create_correct_step() -> StepReasoningVariant: + return StepReasoningVariant( + id=StepReasoningVariants.CORRECT_STEP_ID, name="Correct" + ) + + +def _create_neutral_step() -> StepReasoningVariant: + return StepReasoningVariant( + id=StepReasoningVariants.NEUTRAL_STEP_ID, name="Neutral" + ) + + +def _create_incorrect_step() -> IncorrectStepReasoningVariant: + return IncorrectStepReasoningVariant( + id=StepReasoningVariants.INCORRECT_STEP_ID, name="Incorrect" + ) + @dataclass class StepReasoningVariants: @@ -36,13 +67,13 @@ class StepReasoningVariants: INCORRECT_STEP_ID = 2 correct_step: StepReasoningVariant = field( - default=StepReasoningVariant(CORRECT_STEP_ID, "Correct"), init=False + default_factory=_create_correct_step ) neutral_step: StepReasoningVariant = field( - default=StepReasoningVariant(NEUTRAL_STEP_ID, "Neutral"), init=False + default_factory=_create_neutral_step ) incorrect_step: IncorrectStepReasoningVariant = field( - default=IncorrectStepReasoningVariant(INCORRECT_STEP_ID, "Incorrect"), + default_factory=_create_incorrect_step ) def asdict(self): @@ -52,6 +83,31 @@ def asdict(self): self.incorrect_step.asdict(), ] + @classmethod + def from_dict(cls, dictionary: List[Dict[str, Any]]): + correct_step = None + neutral_step = None + incorrect_step = None + + for variant in dictionary: + if variant["id"] == cls.CORRECT_STEP_ID: + correct_step = StepReasoningVariant(**variant) + elif variant["id"] == cls.NEUTRAL_STEP_ID: + neutral_step = StepReasoningVariant(**variant) + elif variant["id"] == cls.INCORRECT_STEP_ID: + incorrect_step = IncorrectStepReasoningVariant.from_dict( + variant + ) + + if not all([correct_step, neutral_step, incorrect_step]): + raise ValueError("Invalid step reasoning variants") + + return cls( + correct_step=correct_step, # type: ignore + neutral_step=neutral_step, # type: ignore + incorrect_step=incorrect_step, # type: ignore + ) + @dataclass class StepReasoningDefinition: @@ -61,7 +117,6 @@ class StepReasoningDefinition: version: int = field(default=1) title: Optional[str] = None value: Optional[str] = None - color: Optional[str] = None def asdict(self) -> Dict[str, Any]: result = {"variants": self.variants.asdict(), "version": self.version} @@ -69,10 +124,15 @@ def asdict(self) -> Dict[str, Any]: result["title"] = self.title if self.value is not None: result["value"] = self.value - if self.color is not None: - result["color"] = self.color return result + @classmethod + def from_dict(cls, dictionary: Dict[str, Any]) -> "StepReasoningDefinition": + variants = StepReasoningVariants.from_dict(dictionary["variants"]) + title = dictionary.get("title", None) + value = dictionary.get("value", None) + return cls(variants=variants, title=title, value=value) + @dataclass class StepReasoningTool: @@ -113,5 +173,7 @@ def from_dict(cls, dictionary: Dict[str, Any]) -> "StepReasoningTool": schema_id=dictionary.get("schemaNodeId", None), feature_schema_id=dictionary.get("featureSchemaId", None), required=dictionary.get("required", False), - definition=StepReasoningDefinition(**dictionary["definition"]), + definition=StepReasoningDefinition.from_dict( + dictionary["definition"] + ), ) diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 5d80509e1..4bf8d3ae4 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -630,14 +630,12 @@ def chat_evaluation_ontology(client, rand_gen): ), ], ) - ontology = client.create_ontology( ontology_name, ontology_builder.asdict(), media_type=MediaType.Conversational, ontology_kind=OntologyKind.ModelEvaluation, ) - yield ontology try: diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py index 4a633be9b..bde58808b 100644 --- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py +++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py @@ -41,7 +41,7 @@ def test_create_chat_evaluation_ontology_project( def test_create_chat_evaluation_ontology_project_existing_dataset( - client, chat_evaluation_ontology, chat_evaluation_project_append_to_dataset + chat_evaluation_ontology, chat_evaluation_project_append_to_dataset ): ontology = chat_evaluation_ontology @@ -83,6 +83,29 @@ def tools_json(): "schemaNodeId": None, "featureSchemaId": None, }, + { + "tool": "step-reasoning", + "name": "step reasoning", + "required": True, + "schemaNodeId": None, + "featureSchemaId": None, + "color": "#0000ff", + "definition": { + "variants": [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + { + "id": 2, + "name": "Incorrect", + "actions": [ + "regenerateSteps", + "generateAndRateAlternativeSteps", + ], + }, + ], + "version": 1, + }, + }, ] return tools diff --git a/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py b/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py index bb1756afb..8cc8ebcb3 100644 --- a/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py +++ b/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py @@ -2,7 +2,6 @@ def test_create_offline_chat_evaluation_project( - client, rand_gen, offline_chat_evaluation_project, chat_evaluation_ontology, diff --git a/libs/labelbox/tests/integration/test_ontology.py b/libs/labelbox/tests/integration/test_ontology.py index c7c7c270c..84cd2b853 100644 --- a/libs/labelbox/tests/integration/test_ontology.py +++ b/libs/labelbox/tests/integration/test_ontology.py @@ -5,6 +5,7 @@ from labelbox import MediaType, OntologyBuilder, Tool from labelbox.orm.model import Entity +from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool def test_feature_schema_is_not_archived(client, ontology): @@ -322,3 +323,47 @@ def test_unarchive_feature_schema_node_for_non_existing_ontology( client.unarchive_feature_schema_node( "invalid-ontology", feature_schema_to_unarchive["featureSchemaId"] ) + + +def test_step_reasoning_ontology(chat_evaluation_ontology): + ontology = chat_evaluation_ontology + step_reasoning_tool = None + for tool in ontology.normalized["tools"]: + if tool["tool"] == "step-reasoning": + step_reasoning_tool = tool + break + assert step_reasoning_tool is not None + assert step_reasoning_tool["definition"]["variants"] == [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + { + "id": 2, + "name": "Incorrect", + "actions": ["regenerateSteps", "generateAndRateAlternativeSteps"], + }, + ] + assert step_reasoning_tool["definition"]["version"] == 1 + assert step_reasoning_tool["schemaNodeId"] is not None + assert step_reasoning_tool["featureSchemaId"] is not None + + step_reasoning_tool = None + for tool in ontology.tools(): + if isinstance(tool, StepReasoningTool): + step_reasoning_tool = tool + break + assert step_reasoning_tool is not None + assert step_reasoning_tool.definition.variants.asdict() == [ + { + "id": 0, + "name": "Correct", + }, + { + "id": 1, + "name": "Neutral", + }, + { + "id": 2, + "name": "Incorrect", + "actions": ["regenerateSteps", "generateAndRateAlternativeSteps"], + }, + ] diff --git a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py index f364ab55c..cfb65eac9 100644 --- a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py +++ b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py @@ -9,11 +9,21 @@ def test_step_reasoning_as_dict_default(): "required": False, "schemaNodeId": None, "featureSchemaId": None, - "variants": [ - {"id": 0, "name": "Correct"}, - {"id": 1, "name": "Neutral"}, - {"id": 2, "name": "Incorrect", "actions": []}, - ], + "definition": { + "variants": [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + { + "id": 2, + "name": "Incorrect", + "actions": [ + "regenerateSteps", + "generateAndRateAlternativeSteps", + ], + }, + ], + "version": 1, + }, } @@ -27,13 +37,19 @@ def test_step_reasoning_as_dict_with_actions(): "required": False, "schemaNodeId": None, "featureSchemaId": None, - "variants": [ - {"id": 0, "name": "Correct"}, - {"id": 1, "name": "Neutral"}, - { - "id": 2, - "name": "Incorrect", - "actions": ["regenerateSteps", "generateAlternatives"], - }, - ], + "definition": { + "variants": [ + {"id": 0, "name": "Correct"}, + {"id": 1, "name": "Neutral"}, + { + "id": 2, + "name": "Incorrect", + "actions": [ + "regenerateSteps", + "generateAndRateAlternativeSteps", + ], + }, + ], + "version": 1, + }, } From 990ddb7ebb01bdd1025eab9bdaacebf7bb177dfa Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 23 Oct 2024 11:53:38 -0700 Subject: [PATCH 3/6] Add readdoc for the StepReasoningTool --- docs/labelbox/index.rst | 1 + docs/labelbox/step_reasoning_tool.rst | 6 ++++++ .../schema/tool_building/step_reasoning_tool.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 docs/labelbox/step_reasoning_tool.rst diff --git a/docs/labelbox/index.rst b/docs/labelbox/index.rst index fa694119b..15ff9a0a9 100644 --- a/docs/labelbox/index.rst +++ b/docs/labelbox/index.rst @@ -46,6 +46,7 @@ Labelbox Python SDK Documentation search-filters send-to-annotate-params slice + step_reasoning_tool task task-queue user diff --git a/docs/labelbox/step_reasoning_tool.rst b/docs/labelbox/step_reasoning_tool.rst new file mode 100644 index 000000000..b363589e8 --- /dev/null +++ b/docs/labelbox/step_reasoning_tool.rst @@ -0,0 +1,6 @@ +Step Reasoning Tool +=============================================================================================== + +.. automodule:: labelbox.schema.tool_building.step_reasoning_tool + :members: + :show-inheritance: \ No newline at end of file diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py index 280bd3ef7..11c54ddac 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -62,6 +62,11 @@ def _create_incorrect_step() -> IncorrectStepReasoningVariant: @dataclass class StepReasoningVariants: + """ + This class is used to define the possible options for evaluating a step + Currently the options are correct, neutral, and incorrect + """ + CORRECT_STEP_ID = 0 NEUTRAL_STEP_ID = 1 INCORRECT_STEP_ID = 2 @@ -136,6 +141,11 @@ def from_dict(cls, dictionary: Dict[str, Any]) -> "StepReasoningDefinition": @dataclass class StepReasoningTool: + """ + Use this class in OntologyBuilder to create a tool for step reasoning + The definition field lists the possible options to evaulate a step + """ + name: str type: ToolType = field(default=ToolType.STEP_REASONING, init=False) required: bool = False @@ -147,9 +157,15 @@ class StepReasoningTool: ) def set_regenerate_conversations_after_incorrect_step(self): + """ + For live models, will invoke the model to generate alternatives if a step is marked as incorrect + """ self.definition.variants.incorrect_step.regenerate_conversations_after_incorrect_step = True def set_rate_alternative_responses(self): + """ + For live models, will require labelers to rate the alternatives generated by the model + """ self.definition.variants.incorrect_step.rate_alternative_responses = ( True ) From c5e133d8eafb12bde72f4b43402b8e46786d5cde Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 25 Oct 2024 10:38:22 -0700 Subject: [PATCH 4/6] Make set_regenerate_conversations_after_incorrect_step and set_rate_alternative_responses configurable by the user --- .../src/labelbox/schema/tool_building/step_reasoning_tool.py | 2 -- libs/labelbox/tests/integration/test_ontology.py | 4 ++-- libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py | 5 +---- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py index 11c54ddac..dd78d45bd 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -171,8 +171,6 @@ def set_rate_alternative_responses(self): ) def asdict(self) -> Dict[str, Any]: - self.set_rate_alternative_responses() - self.set_regenerate_conversations_after_incorrect_step() return { "tool": self.type.value, "name": self.name, diff --git a/libs/labelbox/tests/integration/test_ontology.py b/libs/labelbox/tests/integration/test_ontology.py index 84cd2b853..76d021bdd 100644 --- a/libs/labelbox/tests/integration/test_ontology.py +++ b/libs/labelbox/tests/integration/test_ontology.py @@ -339,7 +339,7 @@ def test_step_reasoning_ontology(chat_evaluation_ontology): { "id": 2, "name": "Incorrect", - "actions": ["regenerateSteps", "generateAndRateAlternativeSteps"], + "actions": [], }, ] assert step_reasoning_tool["definition"]["version"] == 1 @@ -364,6 +364,6 @@ def test_step_reasoning_ontology(chat_evaluation_ontology): { "id": 2, "name": "Incorrect", - "actions": ["regenerateSteps", "generateAndRateAlternativeSteps"], + "actions": [], }, ] diff --git a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py index cfb65eac9..0d8bd9a53 100644 --- a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py +++ b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py @@ -16,10 +16,7 @@ def test_step_reasoning_as_dict_default(): { "id": 2, "name": "Incorrect", - "actions": [ - "regenerateSteps", - "generateAndRateAlternativeSteps", - ], + "actions": [], }, ], "version": 1, From fa29d2b1eb48ceadd9c17f923739c8a76e37cbf0 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 28 Oct 2024 14:26:41 -0700 Subject: [PATCH 5/6] Adjust default actions to match UI --- .../labelbox/schema/tool_building/step_reasoning_tool.py | 9 +++++---- .../labelbox/tests/unit/test_unit_step_reasoning_tool.py | 5 ++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py index dd78d45bd..f3693a003 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -17,7 +17,7 @@ def asdict(self) -> Dict[str, Any]: class IncorrectStepReasoningVariant: id: int name: str - regenerate_conversations_after_incorrect_step: Optional[bool] = False + regenerate_conversations_after_incorrect_step: Optional[bool] = True rate_alternative_responses: Optional[bool] = False def asdict(self) -> Dict[str, Any]: @@ -156,11 +156,12 @@ class StepReasoningTool: default_factory=StepReasoningDefinition ) - def set_regenerate_conversations_after_incorrect_step(self): + def reset_regenerate_conversations_after_incorrect_step(self): """ - For live models, will invoke the model to generate alternatives if a step is marked as incorrect + For live models, the default acation will invoke the model to generate alternatives if a step is marked as incorrect + This method will reset the action to not regenerate the conversation """ - self.definition.variants.incorrect_step.regenerate_conversations_after_incorrect_step = True + self.definition.variants.incorrect_step.regenerate_conversations_after_incorrect_step = False def set_rate_alternative_responses(self): """ diff --git a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py index 0d8bd9a53..4d6986b86 100644 --- a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py +++ b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py @@ -16,7 +16,7 @@ def test_step_reasoning_as_dict_default(): { "id": 2, "name": "Incorrect", - "actions": [], + "actions": ["regenerateSteps"], }, ], "version": 1, @@ -27,7 +27,7 @@ def test_step_reasoning_as_dict_default(): def test_step_reasoning_as_dict_with_actions(): tool = StepReasoningTool(name="step reasoning") tool.set_rate_alternative_responses() - tool.set_regenerate_conversations_after_incorrect_step() + tool.reset_regenerate_conversations_after_incorrect_step() assert tool.asdict() == { "tool": "step-reasoning", "name": "step reasoning", @@ -42,7 +42,6 @@ def test_step_reasoning_as_dict_with_actions(): "id": 2, "name": "Incorrect", "actions": [ - "regenerateSteps", "generateAndRateAlternativeSteps", ], }, From b8f412b190d3698ea72508a7e23e19987bb46f50 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 28 Oct 2024 15:02:28 -0700 Subject: [PATCH 6/6] Fix tests --- libs/labelbox/mypy.ini | 2 +- .../labelbox/schema/tool_building/step_reasoning_tool.py | 6 ++++++ libs/labelbox/tests/integration/conftest.py | 1 - libs/labelbox/tests/integration/test_ontology.py | 4 ++-- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/libs/labelbox/mypy.ini b/libs/labelbox/mypy.ini index a9b715cf9..b09c45d33 100644 --- a/libs/labelbox/mypy.ini +++ b/libs/labelbox/mypy.ini @@ -12,5 +12,5 @@ ignore_errors = True [mypy-lbox.exceptions] ignore_missing_imports = True -[mypy-lbox.call_info"] +[mypy-lbox.call_info] ignore_missing_imports = True diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py index f3693a003..7b0536cec 100644 --- a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py +++ b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py @@ -1,3 +1,4 @@ +import warnings from dataclasses import dataclass, field from typing import Any, Dict, List, Optional @@ -156,6 +157,11 @@ class StepReasoningTool: default_factory=StepReasoningDefinition ) + def __post_init__(self): + warnings.warn( + "This feature is experimental and subject to change.", + ) + def reset_regenerate_conversations_after_incorrect_step(self): """ For live models, the default acation will invoke the model to generate alternatives if a step is marked as incorrect diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py index 4bf8d3ae4..8e138f4a1 100644 --- a/libs/labelbox/tests/integration/conftest.py +++ b/libs/labelbox/tests/integration/conftest.py @@ -22,7 +22,6 @@ from labelbox.schema.data_row import DataRowMetadataField from labelbox.schema.ontology_kind import OntologyKind from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool -from labelbox.schema.tool_building.tool_type import ToolType from labelbox.schema.user import User diff --git a/libs/labelbox/tests/integration/test_ontology.py b/libs/labelbox/tests/integration/test_ontology.py index 76d021bdd..acb4e7bb1 100644 --- a/libs/labelbox/tests/integration/test_ontology.py +++ b/libs/labelbox/tests/integration/test_ontology.py @@ -339,7 +339,7 @@ def test_step_reasoning_ontology(chat_evaluation_ontology): { "id": 2, "name": "Incorrect", - "actions": [], + "actions": ["regenerateSteps"], }, ] assert step_reasoning_tool["definition"]["version"] == 1 @@ -364,6 +364,6 @@ def test_step_reasoning_ontology(chat_evaluation_ontology): { "id": 2, "name": "Incorrect", - "actions": [], + "actions": ["regenerateSteps"], }, ]