diff --git a/docs/conf.py b/docs/conf.py
index b9870b87a..a67a44a24 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,7 +16,7 @@
 project = 'Python SDK reference'
 copyright = '2024, Labelbox'
 author = 'Labelbox'
-release = '4.0.0'
+release = '5.0.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/libs/labelbox/CHANGELOG.md b/libs/labelbox/CHANGELOG.md
index ae97086c6..b2d41b56d 100644
--- a/libs/labelbox/CHANGELOG.md
+++ b/libs/labelbox/CHANGELOG.md
@@ -1,4 +1,14 @@
 # Changelog
+# Version 5.0.0 (2024-09-16)
+## Updated
+* Set tasks_remaining_count to None LabelingServiceDashboard if labeling has not started ([#1817](https://github.com/Labelbox/labelbox-python/pull/1817))
+* Improve error messaging when creating LLM project with invalid dataset id parameter([#1799](https://github.com/Labelbox/labelbox-python/pull/1799))
+## Removed
+* BREAKING CHANGE SDK methods for exports v1([#1800](https://github.com/Labelbox/labelbox-python/pull/1800))
+* BREAKING CHANGE Unused labelbox_v1 serialization package([#1803](https://github.com/Labelbox/labelbox-python/pull/1803))
+## Fixed
+* Cuid dependencies that cause a crash if numpy is not installed ([#1807](https://github.com/Labelbox/labelbox-python/pull/1807))
+
 # Version 4.0.0 (2024-09-10)
 ## Added
 * BREAKING CHANGE for pydantic V1 users: Converted SDK to use pydantic V2([#1738](https://github.com/Labelbox/labelbox-python/pull/1738))
diff --git a/libs/labelbox/pyproject.toml b/libs/labelbox/pyproject.toml
index 58ce3410a..f4c24af59 100644
--- a/libs/labelbox/pyproject.toml
+++ b/libs/labelbox/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "labelbox"
-version = "4.0.0"
+version = "5.0.0"
 description = "Labelbox Python API"
 authors = [{ name = "Labelbox", email = "engineering@labelbox.com" }]
 dependencies = [
diff --git a/libs/labelbox/src/labelbox/__init__.py b/libs/labelbox/src/labelbox/__init__.py
index 1bd0ba967..f9b82b422 100644
--- a/libs/labelbox/src/labelbox/__init__.py
+++ b/libs/labelbox/src/labelbox/__init__.py
@@ -1,6 +1,6 @@
 name = "labelbox"
 
-__version__ = "4.0.0"
+__version__ = "5.0.0"
 
 from labelbox.client import Client
 from labelbox.schema.project import Project
diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py
index 446db396b..6d13a8d83 100644
--- a/libs/labelbox/tests/conftest.py
+++ b/libs/labelbox/tests/conftest.py
@@ -7,7 +7,9 @@
 import re
 import uuid
 import time
+from labelbox.schema.project import Project
 import requests
+from labelbox.schema.ontology import Ontology
 import pytest
 from types import SimpleNamespace
 from typing import Type
@@ -23,21 +25,11 @@
 from labelbox.schema.queue_mode import QueueMode
 from labelbox import Client
 
-from labelbox import Dataset, DataRow
 from labelbox import LabelingFrontend
-from labelbox import OntologyBuilder, Tool, Option, Classification, MediaType
-from labelbox.orm import query
-from labelbox.pagination import PaginatedCollection
+from labelbox import OntologyBuilder, Tool, Option, Classification
 from labelbox.schema.annotation_import import LabelImport
-from labelbox.schema.catalog import Catalog
 from labelbox.schema.enums import AnnotationImportState
-from labelbox.schema.invite import Invite
-from labelbox.schema.quality_mode import QualityMode
-from labelbox.schema.queue_mode import QueueMode
-from labelbox.schema.user import User
 from labelbox.exceptions import LabelboxError
-from contextlib import suppress
-from labelbox import Client
 
 IMG_URL = "https://picsum.photos/200/300.jpg"
 MASKABLE_IMG_URL = "https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg"
@@ -638,17 +630,22 @@ def organization(client):
 def configured_project_with_label(
     client,
     rand_gen,
-    image_url,
-    project,
     dataset,
     data_row,
     wait_for_label_processing,
+    teardown_helpers,
 ):
     """Project with a connected dataset, having one datarow
+
     Project contains an ontology with 1 bbox tool
     Additionally includes a create_label method for any needed extra labels
     One label is already created and yielded when using fixture
     """
+    project = client.create_project(
+        name=rand_gen(str),
+        queue_mode=QueueMode.Batch,
+        media_type=MediaType.Image,
+    )
     project._wait_until_data_rows_are_processed(
         data_row_ids=[data_row.uid],
         wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS,
@@ -666,8 +663,7 @@ def configured_project_with_label(
     )
     yield [project, dataset, data_row, label]
 
-    for label in project.labels():
-        label.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 def _create_label(project, data_row, ontology, wait_for_label_processing):
@@ -736,13 +732,23 @@ def big_dataset(dataset: Dataset):
 
 @pytest.fixture
 def configured_batch_project_with_label(
-    project, dataset, data_row, wait_for_label_processing
+    client,
+    dataset,
+    data_row,
+    wait_for_label_processing,
+    rand_gen,
+    teardown_helpers,
 ):
     """Project with a batch having one datarow
     Project contains an ontology with 1 bbox tool
     Additionally includes a create_label method for any needed extra labels
     One label is already created and yielded when using fixture
     """
+    project = client.create_project(
+        name=rand_gen(str),
+        queue_mode=QueueMode.Batch,
+        media_type=MediaType.Image,
+    )
     data_rows = [dr.uid for dr in list(dataset.data_rows())]
     project._wait_until_data_rows_are_processed(
         data_row_ids=data_rows, sleep_interval=3
@@ -757,18 +763,27 @@ def configured_batch_project_with_label(
 
     yield [project, dataset, data_row, label]
 
-    for label in project.labels():
-        label.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 @pytest.fixture
 def configured_batch_project_with_multiple_datarows(
-    project, dataset, data_rows, wait_for_label_processing
+    client,
+    dataset,
+    data_rows,
+    wait_for_label_processing,
+    rand_gen,
+    teardown_helpers,
 ):
     """Project with a batch having multiple datarows
     Project contains an ontology with 1 bbox tool
     Additionally includes a create_label method for any needed extra labels
     """
+    project = client.create_project(
+        name=rand_gen(str),
+        queue_mode=QueueMode.Batch,
+        media_type=MediaType.Image,
+    )
     global_keys = [dr.global_key for dr in data_rows]
 
     batch_name = f"batch {uuid.uuid4()}"
@@ -780,26 +795,7 @@ def configured_batch_project_with_multiple_datarows(
 
     yield [project, dataset, data_rows]
 
-    for label in project.labels():
-        label.delete()
-
-
-@pytest.fixture
-def configured_batch_project_for_labeling_service(
-    project, data_row_and_global_key
-):
-    """Project with a batch having multiple datarows
-    Project contains an ontology with 1 bbox tool
-    Additionally includes a create_label method for any needed extra labels
-    """
-    global_keys = [data_row_and_global_key[1]]
-
-    batch_name = f"batch {uuid.uuid4()}"
-    project.create_batch(batch_name, global_keys=global_keys)
-
-    _setup_ontology(project)
-
-    yield project
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 # NOTE this is nice heuristics, also there is this logic _wait_until_data_rows_are_processed in Project
@@ -1062,7 +1058,7 @@ def project_with_empty_ontology(project):
 
 @pytest.fixture
 def configured_project_with_complex_ontology(
-    client, initial_dataset, rand_gen, image_url
+    client, initial_dataset, rand_gen, image_url, teardown_helpers
 ):
     project = client.create_project(
         name=rand_gen(str),
@@ -1127,7 +1123,7 @@ def configured_project_with_complex_ontology(
     project.setup(editor, ontology.asdict())
 
     yield [project, data_row]
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 @pytest.fixture
@@ -1147,12 +1143,13 @@ def valid_model_id():
 
 @pytest.fixture
 def requested_labeling_service(
-    rand_gen,
-    live_chat_evaluation_project_with_new_dataset,
-    chat_evaluation_ontology,
-    model_config,
+    rand_gen, client, chat_evaluation_ontology, model_config, teardown_helpers
 ):
-    project = live_chat_evaluation_project_with_new_dataset
+    project_name = f"test-model-evaluation-project-{rand_gen(str)}"
+    dataset_name = f"test-model-evaluation-dataset-{rand_gen(str)}"
+    project = client.create_model_evaluation_project(
+        name=project_name, dataset_name=dataset_name, data_row_count=1
+    )
     project.connect_ontology(chat_evaluation_ontology)
 
     project.upsert_instructions("tests/integration/media/sample_pdf.pdf")
@@ -1164,3 +1161,105 @@ def requested_labeling_service(
     labeling_service.request()
 
     yield project, project.get_labeling_service()
+
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
+
+
+class TearDownHelpers:
+    @staticmethod
+    def teardown_project_labels_ontology_feature_schemas(project: Project):
+        """
+        Call this function to release project, labels, ontology and feature schemas in fixture teardown
+
+        NOTE: exception handling is not required as this is a fixture teardown
+        """
+        ontology = project.ontology()
+        ontology_id = ontology.uid
+        client = project.client
+        classification_feature_schema_ids = [
+            feature["featureSchemaId"]
+            for feature in ontology.normalized["classifications"]
+        ]
+        tool_feature_schema_ids = [
+            feature["featureSchemaId"]
+            for feature in ontology.normalized["tools"]
+        ]
+
+        feature_schema_ids = (
+            classification_feature_schema_ids + tool_feature_schema_ids
+        )
+        labels = list(project.labels())
+        for label in labels:
+            label.delete()
+
+        project.delete()
+        client.delete_unused_ontology(ontology_id)
+        for feature_schema_id in feature_schema_ids:
+            try:
+                project.client.delete_unused_feature_schema(feature_schema_id)
+            except LabelboxError as e:
+                print(
+                    f"Failed to delete feature schema {feature_schema_id}: {e}"
+                )
+
+    @staticmethod
+    def teardown_ontology_feature_schemas(ontology: Ontology):
+        """
+        Call this function to release project, labels, ontology and feature schemas in fixture teardown
+
+        NOTE: exception handling is not required as this is a fixture teardown
+        """
+        ontology_id = ontology.uid
+        client = ontology.client
+        classification_feature_schema_ids = [
+            feature["featureSchemaId"]
+            for feature in ontology.normalized["classifications"]
+        ] + [
+            option["featureSchemaId"]
+            for feature in ontology.normalized["classifications"]
+            for option in feature.get("options", [])
+        ]
+
+        tool_feature_schema_ids = (
+            [
+                feature["featureSchemaId"]
+                for feature in ontology.normalized["tools"]
+            ]
+            + [
+                classification["featureSchemaId"]
+                for tool in ontology.normalized["tools"]
+                for classification in tool.get("classifications", [])
+            ]
+            + [
+                option["featureSchemaId"]
+                for tool in ontology.normalized["tools"]
+                for classification in tool.get("classifications", [])
+                for option in classification.get("options", [])
+            ]
+        )
+
+        feature_schema_ids = (
+            classification_feature_schema_ids + tool_feature_schema_ids
+        )
+
+        client.delete_unused_ontology(ontology_id)
+        for feature_schema_id in feature_schema_ids:
+            try:
+                project.client.delete_unused_feature_schema(feature_schema_id)
+            except LabelboxError as e:
+                print(
+                    f"Failed to delete feature schema {feature_schema_id}: {e}"
+                )
+
+
+class ModuleTearDownHelpers(TearDownHelpers): ...
+
+
+@pytest.fixture
+def teardown_helpers():
+    return TearDownHelpers()
+
+
+@pytest.fixture(scope="module")
+def module_teardown_helpers():
+    return TearDownHelpers()
diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py
index 39cede0bb..2342a759a 100644
--- a/libs/labelbox/tests/data/annotation_import/conftest.py
+++ b/libs/labelbox/tests/data/annotation_import/conftest.py
@@ -1,5 +1,5 @@
-import itertools
 import uuid
+from typing import Union
 
 from labelbox.schema.model_run import ModelRun
 from labelbox.schema.ontology import Ontology
@@ -14,7 +14,6 @@
 from typing import Tuple, Type
 from labelbox.schema.annotation_import import LabelImport, AnnotationImportState
 from pytest import FixtureRequest
-from contextlib import suppress
 
 """
 The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType. 
@@ -154,6 +153,22 @@ def llm_human_preference_data_row(global_key):
     return llm_human_preference_data_row
 
 
+@pytest.fixture(scope="module")
+def mmc_data_row_url():
+    return "https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json"
+
+
+@pytest.fixture(scope="module", autouse=True)
+def offline_model_evaluation_data_row_factory(mmc_data_row_url: str):
+    def offline_model_evaluation_data_row(global_key: str):
+        return {
+            "row_data": mmc_data_row_url,
+            "global_key": global_key,
+        }
+
+    return offline_model_evaluation_data_row
+
+
 @pytest.fixture(scope="module", autouse=True)
 def data_row_json_by_media_type(
     audio_data_row_factory,
@@ -165,6 +180,7 @@ def data_row_json_by_media_type(
     document_data_row_factory,
     text_data_row_factory,
     video_data_row_factory,
+    offline_model_evaluation_data_row_factory,
 ):
     return {
         MediaType.Audio: audio_data_row_factory,
@@ -176,6 +192,7 @@ def data_row_json_by_media_type(
         MediaType.Document: document_data_row_factory,
         MediaType.Text: text_data_row_factory,
         MediaType.Video: video_data_row_factory,
+        OntologyKind.ModelEvaluation: offline_model_evaluation_data_row_factory,
     }
 
 
@@ -347,6 +364,26 @@ def normalized_ontology_by_media_type():
         ],
     }
 
+    radio_index = {
+        "required": False,
+        "instructions": "radio_index",
+        "name": "radio_index",
+        "type": "radio",
+        "scope": "index",
+        "options": [
+            {
+                "label": "first_radio_answer",
+                "value": "first_radio_answer",
+                "options": [],
+            },
+            {
+                "label": "second_radio_answer",
+                "value": "second_radio_answer",
+                "options": [],
+            },
+        ],
+    }
+
     prompt_text = {
         "instructions": "prompt-text",
         "name": "prompt-text",
@@ -405,6 +442,27 @@ def normalized_ontology_by_media_type():
         "type": "response-text",
     }
 
+    message_single_selection_task = {
+        "required": False,
+        "name": "message-single-selection",
+        "tool": "message-single-selection",
+        "classifications": [],
+    }
+
+    message_multi_selection_task = {
+        "required": False,
+        "name": "message-multi-selection",
+        "tool": "message-multi-selection",
+        "classifications": [],
+    }
+
+    message_ranking_task = {
+        "required": False,
+        "name": "message-ranking",
+        "tool": "message-ranking",
+        "classifications": [],
+    }
+
     return {
         MediaType.Image: {
             "tools": [
@@ -518,6 +576,21 @@ def normalized_ontology_by_media_type():
                 response_checklist,
             ],
         },
+        OntologyKind.ModelEvaluation: {
+            "tools": [
+                message_single_selection_task,
+                message_multi_selection_task,
+                message_ranking_task,
+            ],
+            "classifications": [
+                radio,
+                checklist,
+                free_form_text,
+                radio_index,
+                checklist_index,
+                free_form_text_index,
+            ],
+        },
         "all": {
             "tools": [
                 bbox_tool,
@@ -697,6 +770,45 @@ def _create_prompt_response_project(
     return prompt_response_project, ontology
 
 
+def _create_offline_mmc_project(
+    client: Client, rand_gen, data_row_json, normalized_ontology
+) -> Tuple[Project, Ontology, Dataset]:
+    dataset = client.create_dataset(name=rand_gen(str))
+
+    project = client.create_offline_model_evaluation_project(
+        name=f"offline-mmc-{rand_gen(str)}",
+    )
+
+    ontology = client.create_ontology(
+        name=f"offline-mmc-{rand_gen(str)}",
+        normalized=normalized_ontology,
+        media_type=MediaType.Conversational,
+        ontology_kind=OntologyKind.ModelEvaluation,
+    )
+
+    project.connect_ontology(ontology)
+
+    data_row_data = [
+        data_row_json(rand_gen(str)) for _ in range(DATA_ROW_COUNT)
+    ]
+
+    task = dataset.create_data_rows(data_row_data)
+    task.wait_till_done()
+    global_keys = [row["global_key"] for row in task.result]
+    data_row_ids = [row["id"] for row in task.result]
+
+    project.create_batch(
+        rand_gen(str),
+        data_row_ids,  # sample of data row objects
+        5,  # priority between 1(Highest) - 5(lowest)
+    )
+    project.data_row_ids = data_row_ids
+    project.data_row_data = data_row_data
+    project.global_keys = global_keys
+
+    return project, ontology, dataset
+
+
 def _create_project(
     client: Client,
     rand_gen,
@@ -719,7 +831,6 @@ def _create_project(
     )
 
     project.connect_ontology(ontology)
-
     data_row_data = []
 
     for _ in range(DATA_ROW_COUNT):
@@ -752,10 +863,14 @@ def configured_project(
     normalized_ontology_by_media_type,
     export_v2_test_helpers,
     llm_prompt_response_creation_dataset_with_data_row,
+    teardown_helpers,
 ):
     """Configure project for test. Request.param will contain the media type if not present will use Image MediaType. The project will have 10 data rows."""
 
-    media_type = getattr(request, "param", MediaType.Image)
+    media_type: Union[MediaType, OntologyKind] = getattr(
+        request, "param", MediaType.Image
+    )
+
     dataset = None
 
     if (
@@ -778,6 +893,13 @@ def configured_project(
             media_type,
             normalized_ontology_by_media_type,
         )
+    elif media_type == OntologyKind.ModelEvaluation:
+        project, ontology, dataset = _create_offline_mmc_project(
+            client,
+            rand_gen,
+            data_row_json_by_media_type[media_type],
+            normalized_ontology_by_media_type[media_type],
+        )
     else:
         project, ontology, dataset = _create_project(
             client,
@@ -789,13 +911,11 @@ def configured_project(
 
     yield project
 
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
     if dataset:
         dataset.delete()
 
-    client.delete_unused_ontology(ontology.uid)
-
 
 @pytest.fixture()
 def configured_project_by_global_key(
@@ -805,6 +925,7 @@ def configured_project_by_global_key(
     request: FixtureRequest,
     normalized_ontology_by_media_type,
     export_v2_test_helpers,
+    teardown_helpers,
 ):
     """Does the same thing as configured project but with global keys focus."""
 
@@ -830,6 +951,13 @@ def configured_project_by_global_key(
             media_type,
             normalized_ontology_by_media_type,
         )
+    elif media_type == OntologyKind.ModelEvaluation:
+        project, ontology, dataset = _create_offline_mmc_project(
+            client,
+            rand_gen,
+            data_row_json_by_media_type[media_type],
+            normalized_ontology_by_media_type[media_type],
+        )
     else:
         project, ontology, dataset = _create_project(
             client,
@@ -841,13 +969,11 @@ def configured_project_by_global_key(
 
     yield project
 
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
     if dataset:
         dataset.delete()
 
-    client.delete_unused_ontology(ontology.uid)
-
 
 @pytest.fixture(scope="module")
 def module_project(
@@ -856,6 +982,7 @@ def module_project(
     data_row_json_by_media_type,
     request: FixtureRequest,
     normalized_ontology_by_media_type,
+    module_teardown_helpers,
 ):
     """Generates a image project that scopes to the test module(file). Used to reduce api calls."""
 
@@ -889,13 +1016,13 @@ def module_project(
 
     yield project
 
-    project.delete()
+    module_teardown_helpers.teardown_project_labels_ontology_feature_schemas(
+        project
+    )
 
     if dataset:
         dataset.delete()
 
-    client.delete_unused_ontology(ontology.uid)
-
 
 @pytest.fixture
 def prediction_id_mapping(request, normalized_ontology_by_media_type):
@@ -992,6 +1119,31 @@ def prediction_id_mapping(request, normalized_ontology_by_media_type):
     return base_annotations
 
 
+@pytest.fixture
+def mmc_example_data_row_message_ids(mmc_data_row_url: str):
+    data_row_content = requests.get(mmc_data_row_url).json()
+
+    human_id = next(
+        actor_id
+        for actor_id, actor_metadata in data_row_content["actors"].items()
+        if actor_metadata["role"] == "human"
+    )
+
+    return {
+        message_id: [
+            {
+                "id": child_msg_id,
+                "model_config_name": data_row_content["actors"][
+                    data_row_content["messages"][child_msg_id]["actorId"]
+                ]["metadata"]["modelConfigName"],
+            }
+            for child_msg_id in message_metadata["childMessageIds"]
+        ]
+        for message_id, message_metadata in data_row_content["messages"].items()
+        if message_metadata["actorId"] == human_id
+    }
+
+
 # Each inference represents a feature type that adds to the base annotation created with prediction_id_mapping
 @pytest.fixture
 def polygon_inference(prediction_id_mapping):
@@ -1307,6 +1459,31 @@ def checklist_inference_index(prediction_id_mapping):
     return checklists
 
 
+@pytest.fixture
+def checklist_inference_index_mmc(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    checklists = []
+    for feature in prediction_id_mapping:
+        if "checklist_index" not in feature:
+            return None
+        checklist = feature["checklist_index"].copy()
+        checklist.update(
+            {
+                "answers": [
+                    {"name": "first_checklist_answer"},
+                    {"name": "second_checklist_answer"},
+                ],
+                "messageId": next(
+                    iter(mmc_example_data_row_message_ids.keys())
+                ),
+            }
+        )
+        del checklist["tool"]
+        checklists.append(checklist)
+    return checklists
+
+
 @pytest.fixture
 def prompt_text_inference(prediction_id_mapping):
     prompt_texts = []
@@ -1337,6 +1514,45 @@ def radio_response_inference(prediction_id_mapping):
     return response_radios
 
 
+@pytest.fixture
+def radio_inference(prediction_id_mapping):
+    radios = []
+    for feature in prediction_id_mapping:
+        if "radio" not in feature:
+            continue
+        radio = feature["radio"].copy()
+        radio.update(
+            {
+                "answer": {"name": "first_radio_answer"},
+            }
+        )
+        del radio["tool"]
+        radios.append(radio)
+    return radios
+
+
+@pytest.fixture
+def radio_inference_index_mmc(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    radios = []
+    for feature in prediction_id_mapping:
+        if "radio_index" not in feature:
+            continue
+        radio = feature["radio_index"].copy()
+        radio.update(
+            {
+                "answer": {"name": "first_radio_answer"},
+                "messageId": next(
+                    iter(mmc_example_data_row_message_ids.keys())
+                ),
+            }
+        )
+        del radio["tool"]
+        radios.append(radio)
+    return radios
+
+
 @pytest.fixture
 def checklist_response_inference(prediction_id_mapping):
     response_checklists = []
@@ -1406,6 +1622,28 @@ def text_inference_index(prediction_id_mapping):
     return texts
 
 
+@pytest.fixture
+def text_inference_index_mmc(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    texts = []
+    for feature in prediction_id_mapping:
+        if "text_index" not in feature:
+            continue
+        text = feature["text_index"].copy()
+        text.update(
+            {
+                "answer": "free form text...",
+                "messageId": next(
+                    iter(mmc_example_data_row_message_ids.keys())
+                ),
+            }
+        )
+        del text["tool"]
+        texts.append(text)
+    return texts
+
+
 @pytest.fixture
 def video_checklist_inference(prediction_id_mapping):
     checklists = []
@@ -1441,6 +1679,118 @@ def video_checklist_inference(prediction_id_mapping):
     return checklists
 
 
+@pytest.fixture
+def message_single_selection_inference(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    some_parent_id, some_child_ids = next(
+        iter(mmc_example_data_row_message_ids.items())
+    )
+
+    res = []
+    for feature in prediction_id_mapping:
+        if "message-single-selection" not in feature:
+            continue
+        selection = feature["message-single-selection"].copy()
+        selection.update(
+            {
+                "messageEvaluationTask": {
+                    "format": "message-single-selection",
+                    "data": {
+                        "messageId": some_child_ids[0]["id"],
+                        "parentMessageId": some_parent_id,
+                        "modelConfigName": some_child_ids[0][
+                            "model_config_name"
+                        ],
+                    },
+                }
+            }
+        )
+        del selection["tool"]
+        res.append(selection)
+
+    return res
+
+
+@pytest.fixture
+def message_multi_selection_inference(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    some_parent_id, some_child_ids = next(
+        iter(mmc_example_data_row_message_ids.items())
+    )
+
+    res = []
+    for feature in prediction_id_mapping:
+        if "message-multi-selection" not in feature:
+            continue
+        selection = feature["message-multi-selection"].copy()
+        selection.update(
+            {
+                "messageEvaluationTask": {
+                    "format": "message-multi-selection",
+                    "data": {
+                        "parentMessageId": some_parent_id,
+                        "selectedMessages": [
+                            {
+                                "messageId": child_id["id"],
+                                "modelConfigName": child_id[
+                                    "model_config_name"
+                                ],
+                            }
+                            for child_id in some_child_ids
+                        ],
+                    },
+                }
+            }
+        )
+        del selection["tool"]
+        res.append(selection)
+
+    return res
+
+
+@pytest.fixture
+def message_ranking_inference(
+    prediction_id_mapping, mmc_example_data_row_message_ids
+):
+    some_parent_id, some_child_ids = next(
+        iter(mmc_example_data_row_message_ids.items())
+    )
+
+    res = []
+    for feature in prediction_id_mapping:
+        if "message-ranking" not in feature:
+            continue
+        selection = feature["message-ranking"].copy()
+        selection.update(
+            {
+                "messageEvaluationTask": {
+                    "format": "message-ranking",
+                    "data": {
+                        "parentMessageId": some_parent_id,
+                        "rankedMessages": [
+                            {
+                                "messageId": child_id["id"],
+                                "modelConfigName": child_id[
+                                    "model_config_name"
+                                ],
+                                "order": idx,
+                            }
+                            for idx, child_id in enumerate(
+                                some_child_ids, start=1
+                            )
+                        ],
+                    },
+                }
+            }
+        )
+        del selection["tool"]
+        res.append(selection)
+
+    return res
+
+
 @pytest.fixture
 def annotations_by_media_type(
     polygon_inference,
@@ -1460,6 +1810,13 @@ def annotations_by_media_type(
     checklist_response_inference,
     radio_response_inference,
     text_response_inference,
+    message_single_selection_inference,
+    message_multi_selection_inference,
+    message_ranking_inference,
+    checklist_inference_index_mmc,
+    radio_inference,
+    radio_inference_index_mmc,
+    text_inference_index_mmc,
 ):
     return {
         MediaType.Audio: [checklist_inference, text_inference],
@@ -1497,6 +1854,17 @@ def annotations_by_media_type(
             checklist_response_inference,
             radio_response_inference,
         ],
+        OntologyKind.ModelEvaluation: [
+            message_single_selection_inference,
+            message_multi_selection_inference,
+            message_ranking_inference,
+            radio_inference,
+            checklist_inference,
+            text_inference,
+            radio_inference_index_mmc,
+            checklist_inference_index_mmc,
+            text_inference_index_mmc,
+        ],
     }
 
 
@@ -2166,6 +2534,125 @@ def expected_export_v2_llm_response_creation():
     return expected_annotations
 
 
+@pytest.fixture
+def expected_exports_v2_mmc(mmc_example_data_row_message_ids):
+    some_parent_id, some_child_ids = next(
+        iter(mmc_example_data_row_message_ids.items())
+    )
+
+    return {
+        "objects": [
+            {
+                "name": "message-single-selection",
+                "annotation_kind": "MessageSingleSelection",
+                "classifications": [],
+                "selected_message": {
+                    "message_id": some_child_ids[0]["id"],
+                    "model_config_name": some_child_ids[0]["model_config_name"],
+                    "parent_message_id": some_parent_id,
+                },
+            },
+            {
+                "name": "message-multi-selection",
+                "annotation_kind": "MessageMultiSelection",
+                "classifications": [],
+                "selected_messages": {
+                    "messages": [
+                        {
+                            "message_id": child_id["id"],
+                            "model_config_name": child_id["model_config_name"],
+                        }
+                        for child_id in some_child_ids
+                    ],
+                    "parent_message_id": some_parent_id,
+                },
+            },
+            {
+                "name": "message-ranking",
+                "annotation_kind": "MessageRanking",
+                "classifications": [],
+                "ranked_messages": {
+                    "ranked_messages": [
+                        {
+                            "message_id": child_id["id"],
+                            "model_config_name": child_id["model_config_name"],
+                            "order": idx,
+                        }
+                        for idx, child_id in enumerate(some_child_ids, start=1)
+                    ],
+                    "parent_message_id": some_parent_id,
+                },
+            },
+        ],
+        "classifications": [
+            {
+                "name": "radio",
+                "value": "radio",
+                "radio_answer": {
+                    "name": "first_radio_answer",
+                    "value": "first_radio_answer",
+                    "classifications": [],
+                },
+            },
+            {
+                "name": "checklist",
+                "value": "checklist",
+                "checklist_answers": [
+                    {
+                        "name": "first_checklist_answer",
+                        "value": "first_checklist_answer",
+                        "classifications": [],
+                    },
+                    {
+                        "name": "second_checklist_answer",
+                        "value": "second_checklist_answer",
+                        "classifications": [],
+                    },
+                ],
+            },
+            {
+                "name": "text",
+                "value": "text",
+                "text_answer": {"content": "free form text..."},
+            },
+            {
+                "name": "radio_index",
+                "value": "radio_index",
+                "message_id": some_parent_id,
+                "conversational_radio_answer": {
+                    "name": "first_radio_answer",
+                    "value": "first_radio_answer",
+                    "classifications": [],
+                },
+            },
+            {
+                "name": "checklist_index",
+                "value": "checklist_index",
+                "message_id": some_parent_id,
+                "conversational_checklist_answers": [
+                    {
+                        "name": "first_checklist_answer",
+                        "value": "first_checklist_answer",
+                        "classifications": [],
+                    },
+                    {
+                        "name": "second_checklist_answer",
+                        "value": "second_checklist_answer",
+                        "classifications": [],
+                    },
+                ],
+            },
+            {
+                "name": "text_index",
+                "value": "text_index",
+                "message_id": some_parent_id,
+                "conversational_text_answer": {"content": "free form text..."},
+            },
+        ],
+        "relationships": [],
+    }
+
+
 @pytest.fixture
 def exports_v2_by_media_type(
     expected_export_v2_image,
@@ -2179,6 +2666,7 @@ def exports_v2_by_media_type(
     expected_export_v2_llm_prompt_response_creation,
     expected_export_v2_llm_prompt_creation,
     expected_export_v2_llm_response_creation,
+    expected_exports_v2_mmc,
 ):
     return {
         MediaType.Image: expected_export_v2_image,
@@ -2192,6 +2680,7 @@ def exports_v2_by_media_type(
         MediaType.LLMPromptResponseCreation: expected_export_v2_llm_prompt_response_creation,
         MediaType.LLMPromptCreation: expected_export_v2_llm_prompt_creation,
         OntologyKind.ResponseCreation: expected_export_v2_llm_response_creation,
+        OntologyKind.ModelEvaluation: expected_exports_v2_mmc,
     }
 
 
diff --git a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py
index 18385c9d9..9de67bd4e 100644
--- a/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py
+++ b/libs/labelbox/tests/data/annotation_import/test_generic_data_types.py
@@ -28,6 +28,78 @@ def validate_iso_format(date_string: str):
     assert parsed_t.second is not None
 
 
+@pytest.mark.parametrize(
+    "media_type, data_type_class",
+    [
+        (MediaType.Audio, GenericDataRowData),
+        (MediaType.Html, GenericDataRowData),
+        (MediaType.Image, GenericDataRowData),
+        (MediaType.Text, GenericDataRowData),
+        (MediaType.Video, GenericDataRowData),
+        (MediaType.Conversational, GenericDataRowData),
+        (MediaType.Document, GenericDataRowData),
+        (MediaType.LLMPromptResponseCreation, GenericDataRowData),
+        (MediaType.LLMPromptCreation, GenericDataRowData),
+        (OntologyKind.ResponseCreation, GenericDataRowData),
+        (OntologyKind.ModelEvaluation, GenericDataRowData),
+    ],
+)
+def test_generic_data_row_type_by_data_row_id(
+    media_type,
+    data_type_class,
+    annotations_by_media_type,
+    hardcoded_datarow_id,
+):
+    annotations_ndjson = annotations_by_media_type[media_type]
+    annotations_ndjson = [annotation[0] for annotation in annotations_ndjson]
+
+    label = list(NDJsonConverter.deserialize(annotations_ndjson))[0]
+
+    data_label = Label(
+        data=data_type_class(uid=hardcoded_datarow_id()),
+        annotations=label.annotations,
+    )
+
+    assert data_label.data.uid == label.data.uid
+    assert label.annotations == data_label.annotations
+
+
+@pytest.mark.parametrize(
+    "media_type, data_type_class",
+    [
+        (MediaType.Audio, GenericDataRowData),
+        (MediaType.Html, GenericDataRowData),
+        (MediaType.Image, GenericDataRowData),
+        (MediaType.Text, GenericDataRowData),
+        (MediaType.Video, GenericDataRowData),
+        (MediaType.Conversational, GenericDataRowData),
+        (MediaType.Document, GenericDataRowData),
+        # (MediaType.LLMPromptResponseCreation, GenericDataRowData),
+        # (MediaType.LLMPromptCreation, GenericDataRowData),
+        (OntologyKind.ResponseCreation, GenericDataRowData),
+        (OntologyKind.ModelEvaluation, GenericDataRowData),
+    ],
+)
+def test_generic_data_row_type_by_global_key(
+    media_type,
+    data_type_class,
+    annotations_by_media_type,
+    hardcoded_global_key,
+):
+    annotations_ndjson = annotations_by_media_type[media_type]
+    annotations_ndjson = [annotation[0] for annotation in annotations_ndjson]
+
+    label = list(NDJsonConverter.deserialize(annotations_ndjson))[0]
+
+    data_label = Label(
+        data=data_type_class(global_key=hardcoded_global_key()),
+        annotations=label.annotations,
+    )
+
+    assert data_label.data.global_key == label.data.global_key
+    assert label.annotations == data_label.annotations
+
+
 @pytest.mark.parametrize(
     "configured_project, media_type",
     [
@@ -45,6 +117,7 @@ def validate_iso_format(date_string: str):
         ),
         (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation),
         (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation),
+        (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation),
     ],
     indirect=["configured_project"],
 )
@@ -121,6 +194,7 @@ def test_import_media_types(
         (MediaType.Document, MediaType.Document),
         (MediaType.Dicom, MediaType.Dicom),
         (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation),
+        (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation),
     ],
     indirect=["configured_project_by_global_key"],
 )
@@ -205,6 +279,7 @@ def test_import_media_types_by_global_key(
         ),
         (MediaType.LLMPromptCreation, MediaType.LLMPromptCreation),
         (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation),
+        (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation),
     ],
     indirect=["configured_project"],
 )
@@ -239,6 +314,7 @@ def test_import_mal_annotations(
         (MediaType.Document, MediaType.Document),
         (MediaType.Dicom, MediaType.Dicom),
         (OntologyKind.ResponseCreation, OntologyKind.ResponseCreation),
+        (OntologyKind.ModelEvaluation, OntologyKind.ModelEvaluation),
     ],
     indirect=["configured_project_by_global_key"],
 )
diff --git a/libs/labelbox/tests/data/annotation_import/test_model_run.py b/libs/labelbox/tests/data/annotation_import/test_model_run.py
index 9eca28429..1174115c5 100644
--- a/libs/labelbox/tests/data/annotation_import/test_model_run.py
+++ b/libs/labelbox/tests/data/annotation_import/test_model_run.py
@@ -7,13 +7,23 @@
 from labelbox import DataSplit, ModelRun
 
 
-@pytest.mark.order(1)
-def test_model_run(client, configured_project_with_label, data_row, rand_gen):
+@pytest.fixture
+def current_model(client, configured_project_with_label, rand_gen):
     project, _, _, label = configured_project_with_label
-    label_id = label.uid
     ontology = project.ontology()
-    data = {"name": rand_gen(str), "ontology_id": ontology.uid}
-    model = client.create_model(data["name"], data["ontology_id"])
+
+    model = client.create_model(rand_gen(str), ontology.uid)
+    yield model
+
+    model.delete()
+
+
+def test_model_run(
+    client, configured_project_with_label, current_model, data_row, rand_gen
+):
+    _, _, _, label = configured_project_with_label
+    label_id = label.uid
+    model = current_model
 
     name = rand_gen(str)
     config = {"batch_size": 100, "reruns": None}
diff --git a/libs/labelbox/tests/data/export/conftest.py b/libs/labelbox/tests/data/export/conftest.py
index 0836c2b9e..0a62f39c8 100644
--- a/libs/labelbox/tests/data/export/conftest.py
+++ b/libs/labelbox/tests/data/export/conftest.py
@@ -2,7 +2,6 @@
 import time
 import pytest
 from labelbox.schema.queue_mode import QueueMode
-from labelbox.schema.media_type import MediaType
 from labelbox.schema.labeling_frontend import LabelingFrontend
 from labelbox.schema.annotation_import import LabelImport, AnnotationImportState
 
@@ -242,7 +241,7 @@ def polygon_inference(prediction_id_mapping):
 
 @pytest.fixture
 def configured_project_with_ontology(
-    client, initial_dataset, ontology, rand_gen, image_url
+    client, initial_dataset, ontology, rand_gen, image_url, teardown_helpers
 ):
     dataset = initial_dataset
     project = client.create_project(
@@ -264,11 +263,13 @@ def configured_project_with_ontology(
     )
     project.data_row_ids = data_row_ids
     yield project
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 @pytest.fixture
-def configured_project_without_data_rows(client, ontology, rand_gen):
+def configured_project_without_data_rows(
+    client, ontology, rand_gen, teardown_helpers
+):
     project = client.create_project(
         name=rand_gen(str),
         description=rand_gen(str),
@@ -279,7 +280,7 @@ def configured_project_without_data_rows(client, ontology, rand_gen):
     )[0]
     project.setup(editor, ontology)
     yield project
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 @pytest.fixture
diff --git a/libs/labelbox/tests/data/test_data_row_metadata.py b/libs/labelbox/tests/data/test_data_row_metadata.py
index 9a3690776..891cab9be 100644
--- a/libs/labelbox/tests/data/test_data_row_metadata.py
+++ b/libs/labelbox/tests/data/test_data_row_metadata.py
@@ -92,21 +92,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata:
     return metadata
 
 
-@pytest.mark.skip(reason="broken export v1 api, to be retired soon")
-def test_export_empty_metadata(
-    client, configured_project_with_label, wait_for_data_row_processing
-):
-    project, _, data_row, _ = configured_project_with_label
-    data_row = wait_for_data_row_processing(client, data_row)
-
-    export_task = project.export(params={"metadata_fields": True})
-    export_task.wait_till_done()
-    stream = export_task.get_buffered_stream()
-    data_row = [data_row.json for data_row in stream][0]
-
-    assert data_row["metadata_fields"] == []
-
-
 def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology):
     metadata = make_metadata(data_row.uid)
     mdo.bulk_upsert([metadata])
diff --git a/libs/labelbox/tests/integration/conftest.py b/libs/labelbox/tests/integration/conftest.py
index d37287fe8..c917a6164 100644
--- a/libs/labelbox/tests/integration/conftest.py
+++ b/libs/labelbox/tests/integration/conftest.py
@@ -113,7 +113,7 @@ def configured_project(
 
 @pytest.fixture
 def configured_project_with_complex_ontology(
-    client, initial_dataset, rand_gen, image_url
+    client, initial_dataset, rand_gen, image_url, teardown_helpers
 ):
     project = client.create_project(
         name=rand_gen(str),
@@ -178,7 +178,7 @@ def configured_project_with_complex_ontology(
     project.setup(editor, ontology.asdict())
 
     yield [project, data_row]
-    project.delete()
+    teardown_helpers.teardown_project_labels_ontology_feature_schemas(project)
 
 
 @pytest.fixture
diff --git a/libs/labelbox/tests/integration/test_feature_schema.py b/libs/labelbox/tests/integration/test_feature_schema.py
index 1dc940f08..46ec8c067 100644
--- a/libs/labelbox/tests/integration/test_feature_schema.py
+++ b/libs/labelbox/tests/integration/test_feature_schema.py
@@ -58,9 +58,8 @@ def test_throws_an_error_if_feature_schema_to_delete_doesnt_exist(client):
         client.delete_unused_feature_schema("doesntexist")
 
 
-def test_updates_a_feature_schema_title(client):
-    tool = client.upsert_feature_schema(point.asdict())
-    feature_schema_id = tool.normalized["featureSchemaId"]
+def test_updates_a_feature_schema_title(client, feature_schema):
+    feature_schema_id = feature_schema.normalized["featureSchemaId"]
     new_title = "new title"
     updated_feature_schema = client.update_feature_schema_title(
         feature_schema_id, new_title
@@ -68,20 +67,16 @@ def test_updates_a_feature_schema_title(client):
 
     assert updated_feature_schema.normalized["name"] == new_title
 
-    client.delete_unused_feature_schema(feature_schema_id)
-
 
 def test_throws_an_error_when_updating_a_feature_schema_with_empty_title(
-    client,
+    client, feature_schema
 ):
-    tool = client.upsert_feature_schema(point.asdict())
+    tool = feature_schema
     feature_schema_id = tool.normalized["featureSchemaId"]
 
     with pytest.raises(Exception):
         client.update_feature_schema_title(feature_schema_id, "")
 
-    client.delete_unused_feature_schema(feature_schema_id)
-
 
 def test_throws_an_error_when_updating_not_existing_feature_schema(client):
     with pytest.raises(Exception):
@@ -107,8 +102,8 @@ def test_updates_a_feature_schema(client, feature_schema):
     assert updated_feature_schema.normalized["name"] == "new name"
 
 
-def test_does_not_include_used_feature_schema(client):
-    tool = client.upsert_feature_schema(point.asdict())
+def test_does_not_include_used_feature_schema(client, feature_schema):
+    tool = feature_schema
     feature_schema_id = tool.normalized["featureSchemaId"]
     ontology = client.create_ontology_from_feature_schemas(
         name="ontology name",
@@ -120,4 +115,3 @@ def test_does_not_include_used_feature_schema(client):
     assert feature_schema_id not in unused_feature_schemas
 
     client.delete_unused_ontology(ontology.uid)
-    client.delete_unused_feature_schema(feature_schema_id)