From e98fed3365bdc4440a0d12bfbc054562a314023b Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Mon, 24 Jun 2024 10:57:07 -0400
Subject: [PATCH] Revert "Revert "Vb/model offline eval plt 1107 (#1682)"
 (#1687)"

This reverts commit a8dc569d4df4107af75961588a6db37d8bea918d.
---
 libs/labelbox/src/labelbox/client.py          | 179 ++++++++++--------
 .../src/labelbox/schema/ontology_kind.py      |  16 ++
 libs/labelbox/src/labelbox/schema/project.py  |   1 +
 libs/labelbox/tests/integration/conftest.py   | 149 ++++++++++++++-
 .../test_chat_evaluation_ontology_project.py  |  13 +-
 .../test_offline_chat_evaluation_project.py   |  26 +++
 libs/labelbox/tests/unit/test_project.py      |  36 ++++
 7 files changed, 335 insertions(+), 85 deletions(-)
 create mode 100644 libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py
 create mode 100644 libs/labelbox/tests/unit/test_project.py

diff --git a/libs/labelbox/src/labelbox/client.py b/libs/labelbox/src/labelbox/client.py
index a2fb09186..f0abd2bb5 100644
--- a/libs/labelbox/src/labelbox/client.py
+++ b/libs/labelbox/src/labelbox/client.py
@@ -652,7 +652,8 @@ def delete_model_config(self, id: str) -> bool:
         params = {"id": id}
         result = self.execute(query, params)
         if not result:
-            raise labelbox.exceptions.ResourceNotFoundError(Entity.ModelConfig, params)
+            raise labelbox.exceptions.ResourceNotFoundError(
+                Entity.ModelConfig, params)
         return result['deleteModelConfig']['success']
 
     def create_dataset(self,
@@ -740,85 +741,17 @@ def create_project(self, **kwargs) -> Project:
         Raises:
             InvalidAttributeError: If the Project type does not contain
                 any of the attribute names given in kwargs.
-        """
-
-        auto_audit_percentage = kwargs.get("auto_audit_percentage")
-        auto_audit_number_of_labels = kwargs.get("auto_audit_number_of_labels")
-        if auto_audit_percentage is not None or auto_audit_number_of_labels is not None:
-            raise ValueError(
-                "quality_mode must be set instead of auto_audit_percentage or auto_audit_number_of_labels."
-            )
-
-        name = kwargs.get("name")
-        if name is None or not name.strip():
-            raise ValueError("project name must be a valid string.")
-
-        queue_mode = kwargs.get("queue_mode")
-        if queue_mode is QueueMode.Dataset:
-            raise ValueError(
-                "Dataset queue mode is deprecated. Please prefer Batch queue mode."
-            )
-        elif queue_mode is QueueMode.Batch:
-            logger.warning(
-                "Passing a queue mode of batch is redundant and will soon no longer be supported."
-            )
 
-        media_type = kwargs.get("media_type")
-        if media_type and MediaType.is_supported(media_type):
-            media_type_value = media_type.value
-        elif media_type:
-            raise TypeError(f"{media_type} is not a valid media type. Use"
-                            f" any of {MediaType.get_supported_members()}"
-                            " from MediaType. Example: MediaType.Image.")
-        else:
-            logger.warning(
-                "Creating a project without specifying media_type"
-                " through this method will soon no longer be supported.")
-            media_type_value = None
-
-        ontology_kind = kwargs.pop("ontology_kind", None)
-        if ontology_kind and OntologyKind.is_supported(ontology_kind):
-            editor_task_type_value = EditorTaskTypeMapper.to_editor_task_type(
-                ontology_kind, media_type).value
-        elif ontology_kind:
-            raise OntologyKind.get_ontology_kind_validation_error(ontology_kind)
-        else:
-            editor_task_type_value = None
-
-        quality_mode = kwargs.get("quality_mode")
-        if not quality_mode:
-            logger.info("Defaulting quality mode to Benchmark.")
-
-        data = kwargs
-        data.pop("quality_mode", None)
-        if quality_mode is None or quality_mode is QualityMode.Benchmark:
-            data[
-                "auto_audit_number_of_labels"] = BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS
-            data["auto_audit_percentage"] = BENCHMARK_AUTO_AUDIT_PERCENTAGE
-        elif quality_mode is QualityMode.Consensus:
-            data[
-                "auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS
-            data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE
-        else:
-            raise ValueError(f"{quality_mode} is not a valid quality mode.")
-
-        params = {**data}
-        if media_type_value:
-            params["media_type"] = media_type_value
-        if editor_task_type_value:
-            params["editor_task_type"] = editor_task_type_value
-
-        extra_params = {
-            Field.String("dataset_name_or_id"):
-                params.pop("dataset_name_or_id", None),
-            Field.Boolean("append_to_existing_dataset"):
-                params.pop("append_to_existing_dataset", None),
-            Field.Int("data_row_count"):
-                params.pop("data_row_count", None),
-        }
-        extra_params = {k: v for k, v in extra_params.items() if v is not None}
-
-        return self._create(Entity.Project, params, extra_params)
+        NOTE: the following attributes are used only in chat model evaluation projects:
+            dataset_name_or_id, append_to_existing_dataset, data_row_count, editor_task_type
+            They are not used for general projects and not supported in this method
+        """
+        #  The following arguments are not supported for general projects, only for chat model evaluation projects
+        kwargs.pop("dataset_name_or_id", None)
+        kwargs.pop("append_to_existing_dataset", None)
+        kwargs.pop("data_row_count", None)
+        kwargs.pop("editor_task_type", None)
+        return self._create_project(**kwargs)
 
     @overload
     def create_model_evaluation_project(self,
@@ -881,13 +814,99 @@ def create_model_evaluation_project(self,
             dataset_name_or_id = dataset_name
 
         kwargs["media_type"] = MediaType.Conversational
-        kwargs["ontology_kind"] = OntologyKind.ModelEvaluation
         kwargs["dataset_name_or_id"] = dataset_name_or_id
         kwargs["append_to_existing_dataset"] = append_to_existing_dataset
         kwargs["data_row_count"] = data_row_count
+        kwargs["editor_task_type"] = EditorTaskType.ModelChatEvaluation.value
+
+        return self._create_project(**kwargs)
+
+    def create_offline_model_evaluation_project(self, **kwargs) -> Project:
+        """
+        Creates a project for offline model evaluation.
+        Args:
+            **kwargs: Additional parameters to pass see the create_project method
+        Returns:
+            Project: The created project
+        """
+        kwargs[
+            "media_type"] = MediaType.Conversational  # Only Conversational is supported
+        kwargs[
+            "editor_task_type"] = EditorTaskType.OfflineModelChatEvaluation.value  # Special editor task type for offline model evaluation
+
+        # The following arguments are not supported for offline model evaluation
+        kwargs.pop("dataset_name_or_id", None)
+        kwargs.pop("append_to_existing_dataset", None)
+        kwargs.pop("data_row_count", None)
 
         return self.create_project(**kwargs)
 
+    def _create_project(self, **kwargs) -> Project:
+        auto_audit_percentage = kwargs.get("auto_audit_percentage")
+        auto_audit_number_of_labels = kwargs.get("auto_audit_number_of_labels")
+        if auto_audit_percentage is not None or auto_audit_number_of_labels is not None:
+            raise ValueError(
+                "quality_mode must be set instead of auto_audit_percentage or auto_audit_number_of_labels."
+            )
+
+        name = kwargs.get("name")
+        if name is None or not name.strip():
+            raise ValueError("project name must be a valid string.")
+
+        queue_mode = kwargs.get("queue_mode")
+        if queue_mode is QueueMode.Dataset:
+            raise ValueError(
+                "Dataset queue mode is deprecated. Please prefer Batch queue mode."
+            )
+        elif queue_mode is QueueMode.Batch:
+            logger.warning(
+                "Passing a queue mode of batch is redundant and will soon no longer be supported."
+            )
+
+        media_type = kwargs.get("media_type")
+        if media_type and MediaType.is_supported(media_type):
+            media_type_value = media_type.value
+        elif media_type:
+            raise TypeError(f"{media_type} is not a valid media type. Use"
+                            f" any of {MediaType.get_supported_members()}"
+                            " from MediaType. Example: MediaType.Image.")
+        else:
+            logger.warning(
+                "Creating a project without specifying media_type"
+                " through this method will soon no longer be supported.")
+            media_type_value = None
+
+        quality_mode = kwargs.get("quality_mode")
+        if not quality_mode:
+            logger.info("Defaulting quality mode to Benchmark.")
+
+        data = kwargs
+        data.pop("quality_mode", None)
+        if quality_mode is None or quality_mode is QualityMode.Benchmark:
+            data[
+                "auto_audit_number_of_labels"] = BENCHMARK_AUTO_AUDIT_NUMBER_OF_LABELS
+            data["auto_audit_percentage"] = BENCHMARK_AUTO_AUDIT_PERCENTAGE
+        elif quality_mode is QualityMode.Consensus:
+            data[
+                "auto_audit_number_of_labels"] = CONSENSUS_AUTO_AUDIT_NUMBER_OF_LABELS
+            data["auto_audit_percentage"] = CONSENSUS_AUTO_AUDIT_PERCENTAGE
+        else:
+            raise ValueError(f"{quality_mode} is not a valid quality mode.")
+
+        params = {**data}
+        if media_type_value:
+            params["media_type"] = media_type_value
+
+        extra_params = {
+            Field.String("dataset_name_or_id"):
+                params.pop("dataset_name_or_id", None),
+            Field.Boolean("append_to_existing_dataset"):
+                params.pop("append_to_existing_dataset", None),
+        }
+        extra_params = {k: v for k, v in extra_params.items() if v is not None}
+
+        return self._create(Entity.Project, params, extra_params)
+
     def get_roles(self) -> List[Role]:
         """
         Returns:
diff --git a/libs/labelbox/src/labelbox/schema/ontology_kind.py b/libs/labelbox/src/labelbox/schema/ontology_kind.py
index c50368e9c..e33e7cef3 100644
--- a/libs/labelbox/src/labelbox/schema/ontology_kind.py
+++ b/libs/labelbox/src/labelbox/schema/ontology_kind.py
@@ -26,12 +26,28 @@ def get_ontology_kind_validation_error(cls, ontology_kind):
 class EditorTaskType(Enum):
     ModelChatEvaluation = "MODEL_CHAT_EVALUATION"
     ResponseCreation = "RESPONSE_CREATION"
+    OfflineModelChatEvaluation = "OFFLINE_MODEL_CHAT_EVALUATION"
     Missing = None
 
     @classmethod
     def is_supported(cls, value):
         return isinstance(value, cls)
 
+    @classmethod
+    def _missing_(cls, name) -> 'EditorTaskType':
+        """Handle missing null new task types
+            Handle upper case names for compatibility with
+            the GraphQL"""
+
+        if name is None:
+            return cls.Missing
+
+        for name, member in cls.__members__.items():
+            if name == name.upper():
+                return member
+
+        return cls.Missing
+
 
 class EditorTaskTypeMapper:
 
diff --git a/libs/labelbox/src/labelbox/schema/project.py b/libs/labelbox/src/labelbox/schema/project.py
index aa51cdc22..a2142ebc5 100644
--- a/libs/labelbox/src/labelbox/schema/project.py
+++ b/libs/labelbox/src/labelbox/schema/project.py
@@ -121,6 +121,7 @@ class Project(DbObject, Updateable, Deletable):
     # Bind data_type and allowedMediaTYpe using the GraphQL type MediaType
     media_type = Field.Enum(MediaType, "media_type", "allowedMediaType")
     editor_task_type = Field.Enum(EditorTaskType, "editor_task_type")
+    data_row_count = Field.Int("data_row_count")
 
     # Relationships
     created_by = Relationship.ToOne("User", False, "created_by")
diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py
index 844933388..612d98122 100644
--- a/libs/labelbox/tests/integration/conftest.py
+++ b/libs/labelbox/tests/integration/conftest.py
@@ -400,7 +400,10 @@ def chat_evaluation_ontology(client, rand_gen):
 
     yield ontology
 
-    client.delete_unused_ontology(ontology.uid)
+    try:
+        client.delete_unused_ontology(ontology.uid)
+    except Exception as e:
+        print(f"Failed to delete ontology {ontology.uid}: {str(e)}")
 
 
 @pytest.fixture
@@ -416,6 +419,16 @@ def chat_evaluation_project_create_dataset(client, rand_gen):
     project.delete()
 
 
+@pytest.fixture
+def offline_chat_evaluation_project(client, rand_gen):
+    project_name = f"test-offline-model-evaluation-project-{rand_gen(str)}"
+    project = client.create_offline_model_evaluation_project(name=project_name)
+
+    yield project
+
+    project.delete()
+
+
 @pytest.fixture
 def chat_evaluation_project_append_to_dataset(client, dataset, rand_gen):
     project_name = f"test-model-evaluation-project-{rand_gen(str)}"
@@ -429,6 +442,138 @@ def chat_evaluation_project_append_to_dataset(client, dataset, rand_gen):
     project.delete()
 
 
+@pytest.fixture
+def offline_conversational_data_row(initial_dataset):
+    convo_v2_row_data = {
+        "type": "application/vnd.labelbox.conversational.model-chat-evaluation",
+        "version": 2,
+        "actors": {
+            "clxhs9wk000013b6w7imiz0h8": {
+                "role": "human",
+                "metadata": {
+                    "name": "User"
+                }
+            },
+            "clxhsc6xb00013b6w1awh579j": {
+                "role": "model",
+                "metadata": {
+                    "modelConfigId": "5a50d319-56bd-405d-87bb-4442daea0d0f"
+                }
+            },
+            "clxhsc6xb00023b6wlp0768zs": {
+                "role": "model",
+                "metadata": {
+                    "modelConfigId": "1cfc833a-2684-47df-95ac-bb7d9f9e3e1f"
+                }
+            }
+        },
+        "messages": {
+            "clxhs9wk000023b6wrufora3k": {
+                "actorId": "clxhs9wk000013b6w7imiz0h8",
+                "content": [{
+                    "type": "text",
+                    "content": "Hello world"
+                }],
+                "childMessageIds": ["clxhscb4z00033b6wukpvmuol"]
+            },
+            "clxhscb4z00033b6wukpvmuol": {
+                "actorId": "clxhsc6xb00013b6w1awh579j",
+                "content": [{
+                    "type":
+                        "text",
+                    "content":
+                        "Hello to you too! 👋 \n\nIt's great to be your guide in the digital world. What can I help you with today? 😊 \n"
+                }],
+                "childMessageIds": ["clxhu2s0900013b6wbv0ndddd"]
+            },
+            "clxhu2s0900013b6wbv0ndddd": {
+                "actorId":
+                    "clxhs9wk000013b6w7imiz0h8",
+                "content": [{
+                    "type": "text",
+                    "content": "Lets some some multi-turn happening"
+                }],
+                "childMessageIds": [
+                    "clxhu4qib00023b6wuep47b1l", "clxhu4qib00033b6wf18az01q"
+                ]
+            },
+            "clxhu4qib00023b6wuep47b1l": {
+                "actorId": "clxhsc6xb00013b6w1awh579j",
+                "content": [{
+                    "type":
+                        "text",
+                    "content":
+                        "Okay, I'm ready for some multi-turn fun! To make it interesting, how about we try building a story together? \n\n**Here's the beginning:**\n\nThe old, dusty book lay forgotten on the shelf, its leather cover cracked and faded. But as the afternoon sun slanted through the window, a single ray caught a glint of gold on the book's spine. Suddenly...\n\n**Now you tell me what happens!** What does the glint of gold turn out to be? What happens next? 🤔 \n"
+                }],
+                "childMessageIds": ["clxhu596m00043b6wvkgahcwz"]
+            },
+            "clxhu4qib00033b6wf18az01q": {
+                "actorId": "clxhsc6xb00023b6wlp0768zs",
+                "content": [{
+                    "type":
+                        "text",
+                    "content":
+                        "Okay, I'm game! Let's have a multi-turn conversation! But to get us started, give me a topic or a question to get us going! 😄 \n\nWhat's on your mind? \n"
+                }],
+                "childMessageIds": ["clxhu596m00043b6wvkgahcwz"]
+            },
+            "clxhu596m00043b6wvkgahcwz": {
+                "actorId":
+                    "clxhs9wk000013b6w7imiz0h8",
+                "content": [{
+                    "type": "text",
+                    "content": "Can you generate an image?"
+                }],
+                "childMessageIds": [
+                    "clxhu5hqk00053b6wk21u6xi7", "clxhu5hqk00063b6whl4gofx8"
+                ]
+            },
+            "clxhu5hqk00053b6wk21u6xi7": {
+                "actorId": "clxhsc6xb00013b6w1awh579j",
+                "content": [{
+                    "type":
+                        "text",
+                    "content":
+                        "I can't generate images directly yet. I'm a text-based AI, so my superpowers are all about words! 🪄 \n\nHowever, I'd love to help you describe an image that fits our story. Tell me: \n\n* **What should the image show?** (For example: The dusty book, the glint of gold, the bookshelf, the room, etc.) \n* **What kind of feeling or atmosphere should the image have?** (Mysterious? Magical? Ominous?) \n\nThe more details you give me, the better I can describe the image! ✨ \n"
+                }],
+                "childMessageIds": []
+            },
+            "clxhu5hqk00063b6whl4gofx8": {
+                "actorId": "clxhsc6xb00023b6wlp0768zs",
+                "content": [{
+                    "type":
+                        "text",
+                    "content":
+                        "I can't *actually* generate images directly. 😔 I'm primarily a text-based AI. \n\nTo help me understand what you'd like to see, tell me: \n\n* **What should be in the image?** Be specific! (e.g., \"A cat wearing a tiny hat\", \"A futuristic cityscape at sunset\")\n* **What style do you imagine?** (e.g., realistic, cartoonish, abstract)\n\nOnce you give me those details, I can try to give you a vivid description that's almost as good as seeing it! 😊 \n"
+                }],
+                "childMessageIds": []
+            }
+        },
+        "rootMessageIds": ["clxhs9wk000023b6wrufora3k"]
+    }
+
+    convo_v2_asset = {
+        "row_data": convo_v2_row_data,
+    }
+    data_row = initial_dataset.create_data_row(convo_v2_asset)
+
+    return data_row
+
+
+@pytest.fixture()
+def conversation_data_row(initial_dataset, rand_gen):
+    data = {
+        "row_data":
+            "https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json",
+        "global_key":
+            f"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json-{rand_gen(str)}",
+    }
+    convo_asset = {"row_data": data}
+    data_row = initial_dataset.create_data_row(convo_asset)
+
+    return data_row
+
+
 def pytest_configure():
     pytest.report = defaultdict(int)
 
@@ -456,4 +601,4 @@ def print_perf_summary():
         num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict)
         slowest_fixtures = [(aaa, sorted_dict[aaa])
                             for aaa in islice(sorted_dict, num_of_entries)]
-        print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)
\ No newline at end of file
+        print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)
diff --git a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py
index 4e344cb00..6a41a7a09 100644
--- a/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py
+++ b/libs/labelbox/tests/integration/test_chat_evaluation_ontology_project.py
@@ -1,13 +1,13 @@
 import pytest
-from labelbox import OntologyBuilder, Tool
 from labelbox import MediaType
 from labelbox.schema.ontology_kind import OntologyKind
-from labelbox.schema.labeling_frontend import LabelingFrontend
+from labelbox.exceptions import MalformedQueryException
 
 
 def test_create_chat_evaluation_ontology_project(
         client, chat_evaluation_ontology,
-        chat_evaluation_project_create_dataset):
+        chat_evaluation_project_create_dataset, conversation_data_row,
+        rand_gen):
     ontology = chat_evaluation_ontology
 
     # here we are essentially testing the ontology creation which is a fixture
@@ -29,6 +29,13 @@ def test_create_chat_evaluation_ontology_project(
     assert project.labeling_frontend().name == "Editor"
     assert project.ontology().name == ontology.name
 
+    with pytest.raises(MalformedQueryException,
+                       match="No valid data rows to add to project"):
+        project.create_batch(
+            rand_gen(str),
+            [conversation_data_row.uid],  # sample of data row objects
+        )
+
 
 def test_create_chat_evaluation_ontology_project_existing_dataset(
         client, chat_evaluation_ontology,
diff --git a/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py b/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py
new file mode 100644
index 000000000..d27f4e95e
--- /dev/null
+++ b/libs/labelbox/tests/integration/test_offline_chat_evaluation_project.py
@@ -0,0 +1,26 @@
+import pytest
+
+
+def test_create_offline_chat_evaluation_project(client, rand_gen,
+                                                offline_chat_evaluation_project,
+                                                chat_evaluation_ontology,
+                                                offline_conversational_data_row,
+                                                model_config):
+    project = offline_chat_evaluation_project
+    assert project
+
+    ontology = chat_evaluation_ontology
+    project.setup_editor(ontology)
+
+    assert project.labeling_frontend().name == "Editor"
+    assert project.ontology().name == ontology.name
+
+    batch = project.create_batch(
+        rand_gen(str),
+        [offline_conversational_data_row.uid],  # sample of data row objects
+    )
+    assert batch
+
+    # Can not add a model config to an offline chat evaluation project, since we do not use live models
+    with pytest.raises(Exception):
+        project.add_model_config(model_config.uid)
diff --git a/libs/labelbox/tests/unit/test_project.py b/libs/labelbox/tests/unit/test_project.py
new file mode 100644
index 000000000..9d522e7c0
--- /dev/null
+++ b/libs/labelbox/tests/unit/test_project.py
@@ -0,0 +1,36 @@
+import pytest
+from unittest.mock import MagicMock
+
+from labelbox.schema.project import Project
+from labelbox.schema.ontology_kind import EditorTaskType
+
+
+@pytest.mark.parametrize(
+    'api_editor_task_type, expected_editor_task_type',
+    [(None, EditorTaskType.Missing),
+     ('MODEL_CHAT_EVALUATION', EditorTaskType.ModelChatEvaluation),
+     ('RESPONSE_CREATION', EditorTaskType.ResponseCreation),
+     ('OFFLINE_MODEL_CHAT_EVALUATION',
+      EditorTaskType.OfflineModelChatEvaluation),
+     ('NEW_TYPE', EditorTaskType.Missing)])
+def test_project_editor_task_type(api_editor_task_type,
+                                  expected_editor_task_type):
+    client = MagicMock()
+    project = Project(
+        client, {
+            "id": "test",
+            "name": "test",
+            "createdAt": "2021-06-01T00:00:00.000Z",
+            "updatedAt": "2021-06-01T00:00:00.000Z",
+            "autoAuditNumberOfLabels": 1,
+            "autoAuditPercentage": 100,
+            "dataRowCount": 1,
+            "description": "test",
+            "editorTaskType": api_editor_task_type,
+            "lastActivityTime": "2021-06-01T00:00:00.000Z",
+            "allowedMediaType": "IMAGE",
+            "queueMode": "BATCH",
+            "setupComplete": "2021-06-01T00:00:00.000Z",
+        })
+
+    assert project.editor_task_type == expected_editor_task_type