diff --git a/README.md b/README.md
index 0b1f0f7..1affa88 100644
--- a/README.md
+++ b/README.md
@@ -77,6 +77,11 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
 |
+
+ Composite mask export |
+  |
+  |
+
Export data |
 |
@@ -87,11 +92,6 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
 |
-
- Composite mask export |
-  |
-  |
-
@@ -107,29 +107,29 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
- Project setup |
-  |
-  |
+ Prompt response projects |
+  |
+  |
Queue management |
 |
 |
-
- Webhooks |
-  |
-  |
-
Multimodal chat project |
 |
 |
- Prompt response projects |
-  |
-  |
+ Project setup |
+  |
+  |
+
+
+ Webhooks |
+  |
+  |
@@ -146,9 +146,9 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
- Tiled |
-  |
-  |
+ PDF |
+  |
+  |
Conversational LLM |
@@ -156,19 +156,19 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
- HTML |
-  |
-  |
+ Text |
+  |
+  |
- Image |
-  |
-  |
+ Video |
+  |
+  |
- PDF |
-  |
-  |
+ Tiled |
+  |
+  |
Prompt response |
@@ -176,24 +176,19 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
- DICOM |
-  |
-  |
-
-
- LLM data generation |
-  |
-  |
+ Audio |
+  |
+  |
- Text |
-  |
-  |
+ Offline multimodal chat evaluation |
+  |
+  |
- Audio |
-  |
-  |
+ Image |
+  |
+  |
Conversational |
@@ -201,9 +196,19 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
- Video |
-  |
-  |
+ LLM data generation |
+  |
+  |
+
+
+ HTML |
+  |
+  |
+
+
+ DICOM |
+  |
+  |
@@ -220,29 +225,29 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
- Import YOLOv8 annotations |
-  |
-  |
-
-
- Meta SAM video |
-  |
-  |
+ Huggingface custom embeddings |
+  |
+  |
Meta SAM |
 |
 |
+
+ Meta SAM video |
+  |
+  |
+
Langchain |
 |
 |
- Huggingface custom embeddings |
-  |
-  |
+ Import YOLOv8 annotations |
+  |
+  |
@@ -259,25 +264,25 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
- Custom metrics demo |
-  |
-  |
+ Custom metrics basics |
+  |
+  |
Model slices |
 |
 |
-
- Custom metrics basics |
-  |
-  |
-
Model predictions to project |
 |
 |
+
+ Custom metrics demo |
+  |
+  |
+
@@ -292,21 +297,6 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
-
- Video predictions |
-  |
-  |
-
-
- HTML predictions |
-  |
-  |
-
-
- Geospatial predictions |
-  |
-  |
-
Conversational predictions |
 |
@@ -317,21 +307,36 @@ Welcome to Labelbox Notebooks! These documents are directly linked from our Labe
 |
 |
-
- Conversational LLM predictions |
-  |
-  |
-
PDF predictions |
 |
 |
+
+ HTML predictions |
+  |
+  |
+
Image predictions |
 |
 |
+
+ Geospatial predictions |
+  |
+  |
+
+
+ Video predictions |
+  |
+  |
+
+
+ Conversational LLM predictions |
+  |
+  |
+
diff --git a/annotation_import/offline_multimodal_chat_evaluation.ipynb b/annotation_import/offline_multimodal_chat_evaluation.ipynb
new file mode 100644
index 0000000..2eebec8
--- /dev/null
+++ b/annotation_import/offline_multimodal_chat_evaluation.ipynb
@@ -0,0 +1,301 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {},
+ "cells": [
+ {
+ "metadata": {},
+ "source": [
+ "",
+ " ",
+ " | \n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n",
+ "\n",
+ " \n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Offline multimodal chat evaluation annotation import\n",
+ "\n",
+ "This notebook provides examples of each annotation type supported by the offline multimodal chat evalution project and walks through the complete process of importing annotations as prelabels (model assisted labeling) or ground truth."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Set up"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "%pip install -q \"labelbox[data]\"",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Replace with your API key\n",
+ "\n",
+ "Replace the value of `API_KEY` with a valid [API key]([ref:create-api-key](https://docs.labelbox.com/reference/create-api-key) to connect to the Labelbox client."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "from labelbox.types import (\n Label,\n MessageEvaluationTaskAnnotation,\n MessageInfo,\n MessageMultiSelectionTask,\n MessageRankingTask,\n MessageSingleSelectionTask,\n OrderedMessageInfo,\n)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Supported annotations for multimodal chat evaluation"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Tool: Message ranking"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "message_ranking_annotation = lb_types.MessageEvaluationTaskAnnotation(\n name=\"Message ranking\",\n value=MessageRankingTask(\n parent_message_id=\"clxfznjb800073b6v43ppx9ca\",\n ranked_messages=[\n OrderedMessageInfo(\n message_id=\"clxfzocbm00083b6v8vczsept\",\n model_config_name=\"GPT 4 with temperature 0.7\",\n order=1,\n ),\n OrderedMessageInfo(\n message_id=\"clxfzocbm00093b6vx4ndisub\",\n model_config_name=\"GPT 5\",\n order=2,\n ),\n ],\n ),\n)\n\nmessage_ranking_annotation_ndjson = {\n \"name\": \"model output multi ranking\",\n \"messageEvaluationTask\": {\n \"format\": \"message-ranking\",\n \"data\": {\n \"parentMessageId\":\n \"clxfzhair00023b6vb607bqo6\",\n \"rankedMessages\": [\n {\n \"messageId\": \"clxfzi3r400063b6vuaeajylo\",\n \"modelConfigName\": \"GPT 4 with temperature 0.7\",\n \"order\": 2,\n },\n {\n \"messageId\": \"clxfzi3r400053b6vm5udpdgo\",\n \"modelConfigName\": \"GPT 5\",\n \"order\": 1,\n },\n ],\n },\n },\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Tool: Single message selection"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "single_message_selection_annotation = lb_types.MessageEvaluationTaskAnnotation(\n name=\"Single message selection\",\n value=MessageSingleSelectionTask(\n message_id=\"clxfzi3r400053b6vm5udpdgo\",\n parent_message_id=\"clxfzhair00023b6vb607bqo6\",\n model_config_name=\"GPT 4 with temperature 0.7\",\n ),\n)\nsingle_message_selection_annotation_ndjson = {\n \"name\": \"Single message selection\",\n \"messageEvaluationTask\": {\n \"format\": \"message-single-selection\",\n \"data\": {\n \"messageId\": \"clxfzi3r400053b6vm5udpdgo\",\n \"parentMessageId\": \"clxfzhair00023b6vb607bqo6\",\n \"modelConfigName\": \"GPT 4 with temperature 0.7\",\n },\n },\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Tool: Multiple message selection"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "multiple_message_selection_annotation = (\n lb_types.MessageEvaluationTaskAnnotation(\n name=\"Multi message selection\",\n value=MessageMultiSelectionTask(\n parent_message_id=\"clxfzhair00023b6vb607bqo6\",\n selected_messages=[\n MessageInfo(\n message_id=\"clxfzi3r400063b6vuaeajylo\",\n model_config_name=\"GPT 4 with temperature 0.7\",\n ),\n MessageInfo(\n message_id=\"clxfzi3r400053b6vm5udpdgo\",\n model_config_name=\"GPT 5\",\n ),\n ],\n ),\n ))\nmultiple_message_selection_annotation_ndjson = {\n \"name\": \"Multi message selection\",\n \"messageEvaluationTask\": {\n \"format\": \"message-multi-selection\",\n \"data\": {\n \"parentMessageId\":\n \"clxfzhair00023b6vb607bqo6\",\n \"selectedMessages\": [\n {\n \"messageId\": \"clxfzi3r400063b6vuaeajylo\",\n \"modelConfigName\": \"GPT 4 with temperature 0.7\",\n },\n {\n \"messageId\": \"clxfzi3r400053b6vm5udpdgo\",\n \"modelConfigName\": \"GPT 5\",\n },\n ],\n },\n },\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Radio (single-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "radio_annotation = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"Response B\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"Choose the best response\",\n \"answer\": {\n \"name\": \"Response B\"\n },\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Free-form text"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "text_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is the more concise answer\",\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Classification: Checklist (multi-choice)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"clxfznjb800073b6v43ppx9ca\", # Message specific annotation\n)\n\nchecklist_annotation_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n \"messageId\": \"clxfznjb800073b6v43ppx9ca\",\n}",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 1: Import data rows into Catalog"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "mmc_asset = \"https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json\"\nglobal_key = \"offline-multimodal_chat_evaluation\"\n\n# Upload data rows\nconvo_data = {\"row_data\": mmc_asset, \"global_key\": global_key}\n\n# Create a dataset\ndataset = client.create_dataset(name=\"offline-multimodal_chat_evaluation_demo\")\n# Create a datarow\ntask = dataset.create_data_rows([convo_data])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 2: Create/select an Ontology"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(\n tool=lb.Tool.Type.MESSAGE_SINGLE_SELECTION,\n name=\"Single message selection\",\n ),\n lb.Tool(\n tool=lb.Tool.Type.MESSAGE_MULTI_SELECTION,\n name=\"Multi message selection\",\n ),\n lb.Tool(tool=lb.Tool.Type.MESSAGE_RANKING, name=\"Message ranking\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n scope=lb.Classification.Scope.GLOBAL,\n name=\"Choose the best response\",\n options=[\n lb.Option(value=\"Response A\"),\n lb.Option(value=\"Response B\"),\n lb.Option(value=\"Tie\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"Provide a reason for your choice\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n ],\n)\n# Create ontology\nontology = client.create_ontology(\n \"MMC ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Conversational,\n ontology_kind=lb.OntologyKind.ModelEvaluation,\n)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 3: Create a labeling project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# Create Labelbox project\nproject = client.create_offline_model_evaluation_project(\n name=\"Offline MMC Import Demo\",\n description=\"\", # optional\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.connect_ontology(\n ontology) # Connect your ontology and editor to your project",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 4: Send a batch of data rows to the project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# Create a batch to send to your project\nbatch = project.create_batch(\n \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 5: Create the annotations payload"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "Python annotation"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n message_ranking_annotation,\n single_message_selection_annotation,\n multiple_message_selection_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n ],\n ))",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "NDJSON annotation"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "label_ndjson = []\nfor annotations in [\n message_ranking_annotation_ndjson,\n single_message_selection_annotation_ndjson,\n multiple_message_selection_annotation_ndjson,\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 6: import annotations to a project as pre-labels or ground truth labels"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Import as prelabels (model assisted labeling)"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Import as ground truth labels"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/requirements-dev.lock b/requirements-dev.lock
index ce6e8f2..d0f5cda 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -13,7 +13,7 @@ annotated-types==0.7.0
# via pydantic
asttokens==2.4.1
# via stack-data
-black==24.4.2
+black==24.8.0
click==8.1.7
# via black
# via typer
@@ -22,15 +22,15 @@ commonmark==0.9.1
databooks==1.3.10
decorator==5.1.1
# via ipython
-executing==2.0.1
+executing==2.1.0
# via stack-data
gitdb==4.0.11
# via gitpython
gitpython==3.1.43
# via databooks
-importlib-metadata==8.2.0
+importlib-metadata==8.5.0
# via yapf
-ipython==8.26.0
+ipython==8.27.0
# via black
jedi==0.19.1
# via ipython
@@ -38,7 +38,7 @@ matplotlib-inline==0.1.7
# via ipython
mypy-extensions==1.0.0
# via black
-numpy==2.0.1
+numpy==2.1.1
# via pandas
packaging==24.1
# via black
@@ -49,7 +49,7 @@ pathspec==0.12.1
# via black
pexpect==4.9.0
# via ipython
-platformdirs==4.2.2
+platformdirs==4.3.2
# via black
# via yapf
prompt-toolkit==3.0.47
@@ -58,16 +58,16 @@ ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.3
# via stack-data
-pydantic==2.8.2
+pydantic==2.9.1
# via databooks
-pydantic-core==2.20.1
+pydantic-core==2.23.3
# via pydantic
pygments==2.18.0
# via ipython
# via rich
python-dateutil==2.9.0.post0
# via pandas
-pytz==2024.1
+pytz==2024.2
# via pandas
rich==12.6.0
# via databooks
@@ -81,7 +81,7 @@ smmap==5.0.1
# via gitdb
stack-data==0.6.3
# via ipython
-tokenize-rt==5.2.0
+tokenize-rt==6.0.0
# via black
tomli==2.0.1
# via databooks
@@ -89,7 +89,7 @@ tomli==2.0.1
traitlets==5.14.3
# via ipython
# via matplotlib-inline
-typer==0.12.3
+typer==0.12.5
# via databooks
typing-extensions==4.12.2
# via databooks
@@ -101,5 +101,5 @@ tzdata==2024.1
wcwidth==0.2.13
# via prompt-toolkit
yapf==0.40.2
-zipp==3.19.2
+zipp==3.20.1
# via importlib-metadata