Refactor StepReasoning to also reuse Variants

Val Brodsky · Val Brodsky · commit bc04ae34b26a · 2024-11-01T11:24:09.000-07:00
diff --git a/libs/labelbox/src/labelbox/schema/tool_building/fact_checking_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/fact_checking_tool.py
@@ -1,5 +1,7 @@
+import warnings
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set
+from enum import Enum
+from typing import Any, Dict, List, Optional
 
 from labelbox.schema.tool_building.tool_type import ToolType
 from labelbox.schema.tool_building.variant import (
@@ -8,6 +10,18 @@
 )
 
 
+class UnsupportedStepActions(Enum):
+    WRITE_JUSTIFICATION = "writeJustification"
+
+
+class CanConfidentlyAssessStepActions(Enum):
+    WRITE_JUSTIFICATION = "writeJustification"
+
+
+class NoFactualInformationStepActions(Enum):
+    WRITE_JUSTIFICATION = "writeJustification"
+
+
 @dataclass
 class FactCheckingVariants:
     """
@@ -26,21 +40,32 @@ class FactCheckingVariants:
     )
     unsupported_step: VariantWithActions = field(
         default_factory=lambda: VariantWithActions(
-            id=3, name="Unsupported", _available_actions={"writeJustification"}
+            id=3,
+            name="Unsupported",
+            _available_actions={
+                action.value for action in UnsupportedStepActions
+            },
+            actions=[UnsupportedStepActions.WRITE_JUSTIFICATION.value],
         )
     )
     cant_confidently_assess_step: VariantWithActions = field(
         default_factory=lambda: VariantWithActions(
             id=4,
             name="Can't confidently assess",
-            _available_actions={"writeJustification"},
+            _available_actions={
+                action.value for action in CanConfidentlyAssessStepActions
+            },
+            actions=[CanConfidentlyAssessStepActions.WRITE_JUSTIFICATION.value],
         )
     )
     no_factual_information_step: VariantWithActions = field(
         default_factory=lambda: VariantWithActions(
             id=5,
             name="No factual information",
-            _available_actions={"writeJustification"},
+            _available_actions={
+                action.value for action in NoFactualInformationStepActions
+            },
+            actions=[NoFactualInformationStepActions.WRITE_JUSTIFICATION.value],
         )
     )
 
@@ -138,23 +163,31 @@ class FactCheckingTool:
         default_factory=FactCheckingDefinition
     )
 
+    def __post_init__(self):
+        warnings.warn(
+            "This feature is experimental and subject to change.",
+        )
+
     def set_unsupported_step_actions(
-        self, actions: Set[str] = {"writeJustification"}
+        self, actions: List[UnsupportedStepActions]
     ) -> None:
-        self.definition.variants.unsupported_step.set_actions(actions)
+        actions_values = [action.value for action in actions]
+        self.definition.variants.unsupported_step.set_actions(actions_values)
 
     def set_cant_confidently_assess_step_actions(
-        self, actions: Set[str] = {"writeJustification"}
+        self, actions: List[CanConfidentlyAssessStepActions]
     ) -> None:
+        actions_values = [action.value for action in actions]
         self.definition.variants.cant_confidently_assess_step.set_actions(
-            actions
+            actions_values
         )
 
     def set_no_factual_information_step_actions(
-        self, actions: Set[str] = {"writeJustification"}
+        self, actions: List[NoFactualInformationStepActions]
     ) -> None:
+        actions_values = [action.value for action in actions]
         self.definition.variants.no_factual_information_step.set_actions(
-            actions
+            actions_values
         )
 
     def asdict(self) -> Dict[str, Any]:
diff --git a/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py b/libs/labelbox/src/labelbox/schema/tool_building/step_reasoning_tool.py
@@ -1,64 +1,15 @@
 import warnings
 from dataclasses import dataclass, field
+from enum import Enum
 from typing import Any, Dict, List, Optional
 
 from labelbox.schema.tool_building.tool_type import ToolType
+from labelbox.schema.tool_building.variant import Variant, VariantWithActions
 
 
-@dataclass
-class StepReasoningVariant:
-    id: int
-    name: str
-
-    def asdict(self) -> Dict[str, Any]:
-        return {"id": self.id, "name": self.name}
-
-
-@dataclass
-class IncorrectStepReasoningVariant:
-    id: int
-    name: str
-    regenerate_conversations_after_incorrect_step: Optional[bool] = True
-    rate_alternative_responses: Optional[bool] = True
-
-    def asdict(self) -> Dict[str, Any]:
-        actions = []
-        if self.regenerate_conversations_after_incorrect_step:
-            actions.append("regenerateSteps")
-        if self.rate_alternative_responses:
-            actions.append("generateAndRateAlternativeSteps")
-        return {"id": self.id, "name": self.name, "actions": actions}
-
-    @classmethod
-    def from_dict(
-        cls, dictionary: Dict[str, Any]
-    ) -> "IncorrectStepReasoningVariant":
-        return cls(
-            id=dictionary["id"],
-            name=dictionary["name"],
-            regenerate_conversations_after_incorrect_step="regenerateSteps"
-            in dictionary.get("actions", []),
-            rate_alternative_responses="generateAndRateAlternativeSteps"
-            in dictionary.get("actions", []),
-        )
-
-
-def _create_correct_step() -> StepReasoningVariant:
-    return StepReasoningVariant(
-        id=StepReasoningVariants.CORRECT_STEP_ID, name="Correct"
-    )
-
-
-def _create_neutral_step() -> StepReasoningVariant:
-    return StepReasoningVariant(
-        id=StepReasoningVariants.NEUTRAL_STEP_ID, name="Neutral"
-    )
-
-
-def _create_incorrect_step() -> IncorrectStepReasoningVariant:
-    return IncorrectStepReasoningVariant(
-        id=StepReasoningVariants.INCORRECT_STEP_ID, name="Incorrect"
-    )
+class IncorrectStepActions(Enum):
+    REGENERATE_STEPS = "regenerateSteps"
+    GENERATE_AND_RATE_ALTERNATIVE_STEPS = "generateAndRateAlternativeSteps"
 
 
 @dataclass
@@ -68,18 +19,22 @@ class StepReasoningVariants:
     Currently the options are correct, neutral, and incorrect
     """
 
-    CORRECT_STEP_ID = 0
-    NEUTRAL_STEP_ID = 1
-    INCORRECT_STEP_ID = 2
-
-    correct_step: StepReasoningVariant = field(
-        default_factory=_create_correct_step
+    correct_step: Variant = field(
+        default_factory=lambda: Variant(id=0, name="Correct")
     )
-    neutral_step: StepReasoningVariant = field(
-        default_factory=_create_neutral_step
+    neutral_step: Variant = field(
+        default_factory=lambda: Variant(id=1, name="Neutral")
     )
-    incorrect_step: IncorrectStepReasoningVariant = field(
-        default_factory=_create_incorrect_step
+
+    incorrect_step: VariantWithActions = field(
+        default_factory=lambda: VariantWithActions(
+            id=2,
+            name="Incorrect",
+            _available_actions={
+                action.value for action in IncorrectStepActions
+            },
+            actions=["regenerateSteps"],  # regenerateSteps is on by default
+        )
     )
 
     def asdict(self):
@@ -96,14 +51,12 @@ def from_dict(cls, dictionary: List[Dict[str, Any]]):
         incorrect_step = None
 
         for variant in dictionary:
-            if variant["id"] == cls.CORRECT_STEP_ID:
-                correct_step = StepReasoningVariant(**variant)
-            elif variant["id"] == cls.NEUTRAL_STEP_ID:
-                neutral_step = StepReasoningVariant(**variant)
-            elif variant["id"] == cls.INCORRECT_STEP_ID:
-                incorrect_step = IncorrectStepReasoningVariant.from_dict(
-                    variant
-                )
+            if variant["id"] == 0:
+                correct_step = Variant(**variant)
+            elif variant["id"] == 1:
+                neutral_step = Variant(**variant)
+            elif variant["id"] == 2:
+                incorrect_step = VariantWithActions(**variant)
 
         if not all([correct_step, neutral_step, incorrect_step]):
             raise ValueError("Invalid step reasoning variants")
@@ -162,20 +115,12 @@ def __post_init__(self):
             "This feature is experimental and subject to change.",
         )
 
-    def reset_regenerate_conversations_after_incorrect_step(self):
+    def set_incorrect_step_actions(self, actions: List[IncorrectStepActions]):
         """
-        For live models, the default acation will invoke the model to generate alternatives if a step is marked as incorrect
-        This method will reset the action to not regenerate the conversation
+        For live models, will invoke the model to generate alternatives if a step is marked as incorrect
         """
-        self.definition.variants.incorrect_step.regenerate_conversations_after_incorrect_step = False
-
-    def reset_rate_alternative_responses(self):
-        """
-        For live models, will require labelers to rate the alternatives generated by the model
-        """
-        self.definition.variants.incorrect_step.rate_alternative_responses = (
-            False
-        )
+        actions_values = [action.value for action in actions]
+        self.definition.variants.incorrect_step.set_actions(actions_values)
 
     def asdict(self) -> Dict[str, Any]:
         return {
diff --git a/libs/labelbox/src/labelbox/schema/tool_building/tool_type_mapping.py b/libs/labelbox/src/labelbox/schema/tool_building/tool_type_mapping.py
@@ -5,12 +5,10 @@
 
 def map_tool_type_to_tool_cls(tool_type_str: str):
     if not ToolType.valid(tool_type_str):
-        raise ValueError(f"Invalid tool type {tool_type_str}")
+        return None
 
     tool_type = ToolType(tool_type_str.lower())
     if tool_type == ToolType.STEP_REASONING:
         return StepReasoningTool
     elif tool_type == ToolType.FACT_CHECKING:
         return FactCheckingTool
-
-    return None
diff --git a/libs/labelbox/src/labelbox/schema/tool_building/variant.py b/libs/labelbox/src/labelbox/schema/tool_building/variant.py
@@ -22,7 +22,8 @@ class VariantWithActions:
     actions: List[str] = field(default_factory=list)
     _available_actions: Set[str] = field(default_factory=set)
 
-    def set_actions(self, actions: Set[str]) -> None:
+    def set_actions(self, actions: List[str]) -> None:
+        self.actions = []
         for action in actions:
             if action in self._available_actions:
                 self.actions.append(action)
@@ -31,8 +32,11 @@ def reset_actions(self) -> None:
         self.actions = []
 
     def asdict(self) -> Dict[str, Any]:
-        return {
+        data = {
             "id": self.id,
             "name": self.name,
-            "actions": list(set(self.actions)),
         }
+        if len(self.actions) > 0:
+            data["actions"] = self.actions
+
+        return data
diff --git a/libs/labelbox/tests/integration/test_ontology.py b/libs/labelbox/tests/integration/test_ontology.py
@@ -5,6 +5,7 @@
 
 from labelbox import MediaType, OntologyBuilder, Tool
 from labelbox.orm.model import Entity
+from labelbox.schema.tool_building.fact_checking_tool import FactCheckingTool
 from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool
 
 
@@ -339,7 +340,7 @@ def test_step_reasoning_ontology(chat_evaluation_ontology):
         {
             "id": 2,
             "name": "Incorrect",
-            "actions": [],
+            "actions": ["regenerateSteps"],
         },
     ]
     assert step_reasoning_tool["definition"]["version"] == 1
@@ -364,6 +365,59 @@ def test_step_reasoning_ontology(chat_evaluation_ontology):
         {
             "id": 2,
             "name": "Incorrect",
-            "actions": [],
+            "actions": ["regenerateSteps"],
+        },
+    ]
+
+
+def test_fact_checking_ontology(chat_evaluation_ontology):
+    ontology = chat_evaluation_ontology
+    fact_checking = None
+    for tool in ontology.normalized["tools"]:
+        if tool["tool"] == "fact-checking":
+            fact_checking = tool
+            break
+    assert fact_checking is not None
+    assert fact_checking["definition"]["variants"] == [
+        {"id": 0, "name": "Accurate"},
+        {"id": 1, "name": "Inaccurate"},
+        {"id": 2, "name": "Disputed"},
+        {"id": 3, "name": "Unsupported", "actions": ["writeJustification"]},
+        {
+            "id": 4,
+            "name": "Can't confidently assess",
+            "actions": ["writeJustification"],
+        },
+        {
+            "id": 5,
+            "name": "No factual information",
+            "actions": ["writeJustification"],
+        },
+    ]
+    assert fact_checking["definition"]["version"] == 1
+    assert fact_checking["schemaNodeId"] is not None
+    assert fact_checking["featureSchemaId"] is not None
+
+    fact_checking = None
+    for tool in ontology.tools():
+        if isinstance(tool, FactCheckingTool):
+            fact_checking = tool
+            break
+    assert fact_checking is not None
+
+    assert fact_checking.definition.variants.asdict() == [
+        {"id": 0, "name": "Accurate"},
+        {"id": 1, "name": "Inaccurate"},
+        {"id": 2, "name": "Disputed"},
+        {"id": 3, "name": "Unsupported", "actions": ["writeJustification"]},
+        {
+            "id": 4,
+            "name": "Can't confidently assess",
+            "actions": ["writeJustification"],
+        },
+        {
+            "id": 5,
+            "name": "No factual information",
+            "actions": ["writeJustification"],
         },
     ]
diff --git a/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py b/libs/labelbox/tests/unit/test_unit_step_reasoning_tool.py
@@ -1,4 +1,7 @@
-from labelbox.schema.tool_building.step_reasoning_tool import StepReasoningTool
+from labelbox.schema.tool_building.step_reasoning_tool import (
+    IncorrectStepActions,
+    StepReasoningTool,
+)
 
 
 def test_step_reasoning_as_dict_default():
@@ -16,7 +19,7 @@ def test_step_reasoning_as_dict_default():
                 {
                     "id": 2,
                     "name": "Incorrect",
-                    "actions": [],
+                    "actions": ["regenerateSteps"],
                 },
             ],
             "version": 1,
@@ -26,8 +29,12 @@ def test_step_reasoning_as_dict_default():
 
 def test_step_reasoning_as_dict_with_actions():
     tool = StepReasoningTool(name="step reasoning")
-    tool.reset_rate_alternative_responses()
-    tool.reset_regenerate_conversations_after_incorrect_step()
+    tool.set_incorrect_step_actions(
+        [
+            IncorrectStepActions.REGENERATE_STEPS,
+            IncorrectStepActions.GENERATE_AND_RATE_ALTERNATIVE_STEPS,
+        ]
+    )
     assert tool.asdict() == {
         "tool": "step-reasoning",
         "name": "step reasoning",