From 81c708e2c3340451f8ebfbf2f9c5f1d1b40bc31d Mon Sep 17 00:00:00 2001
From: Gabefire <33893811+Gabefire@users.noreply.github.com>
Date: Mon, 29 Jul 2024 20:20:37 -0500
Subject: [PATCH 1/2] fixed notebook
---
annotation_import/prompt_response.ipynb | 514 +++++++++++++++++++-----
1 file changed, 416 insertions(+), 98 deletions(-)
diff --git a/annotation_import/prompt_response.ipynb b/annotation_import/prompt_response.ipynb
index 935689a..38965ae 100644
--- a/annotation_import/prompt_response.ipynb
+++ b/annotation_import/prompt_response.ipynb
@@ -1,18 +1,16 @@
{
- "nbformat": 4,
- "nbformat_minor": 2,
- "metadata": {},
"cells": [
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
- "
",
- " ",
+ " | \n",
+ " \n",
" | \n"
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"\n",
@@ -24,19 +22,19 @@
" \n",
" | "
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"# Prompt and response projects with MAL and Ground Truth\n",
"\n",
"This notebook is meant to showcase how to generate prompts and responses to fine-tune large language models (LLMs) using MAL and Ground truth"
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Annotation payload types\n",
@@ -51,10 +49,10 @@
"- JSON\n",
" - Skips formatting annotation payload in the Labelbox Python annotation type.\n",
" - Supports any levels of nested classification (radio, checklist, or free-form text) under a tool or classification annotation."
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Label Import Types\n",
@@ -63,47 +61,57 @@
"\n",
"- [Model-assisted labeling (MAL)](https://docs.labelbox.com/docs/model-assisted-labeling) allows you to import computer-generated predictions and simple annotations created outside of Labelbox as pre-labels on an asset.\n",
"- [Ground truth](hhttps://docs.labelbox.com/docs/import-ground-truth) allows you to bulk import ground truth annotations from an external or third-party labeling system into Labelbox _Annotate_. Using the label import API to import external data can consolidate and migrate all annotations into Labelbox as a single source of truth."
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Set up "
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "%pip install -q \"labelbox[data]\"",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "%pip install -q --upgrade \"labelbox[data]\""
+ ]
},
{
- "metadata": {},
- "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "import labelbox as lb\n",
+ "import labelbox.types as lb_types\n",
+ "import time\n",
+ "import uuid"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"### Replace with your API key\n",
"\n",
"Replace the value of `API_KEY` with a valid [API key]([ref:create-api-key](https://docs.labelbox.com/reference/create-api-key)) to connect to the Labelbox client."
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "API_KEY = None\n",
+ "client = lb.Client(api_key=API_KEY)"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Supported Annotations\n",
@@ -122,94 +130,206 @@
"- Response creation projects\n",
" - Radio\n",
" - Checklist"
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"### Prompt"
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Prompt text"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "prompt_annotation = lb_types.PromptClassificationAnnotation(\n name=\"Follow the prompt and select answers\",\n value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n)\n\nprompt_annotation_ndjson = {\n \"name\": \"Follow the prompt and select answers\",\n \"answer\": \"This is an example of a prompt\",\n}",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "prompt_annotation = lb_types.PromptClassificationAnnotation(\n",
+ " name=\"prompt text\",\n",
+ " value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n",
+ ")\n",
+ "\n",
+ "prompt_annotation_ndjson = {\n",
+ " \"name\": \"prompt text\",\n",
+ " \"answer\": \"This is an example of a prompt\",\n",
+ "}"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"### Responses"
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Radio (single-choice)"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"response radio feature\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nresponse_radio_annotation_ndjson = {\n \"name\": \"response radio feature\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "response_radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"response radio feature\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_radio_answer\")),\n",
+ ")\n",
+ "\n",
+ "response_radio_annotation_ndjson = {\n",
+ " \"name\": \"response radio feature\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_radio_answer\"\n",
+ " },\n",
+ "}"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Checklist (multi-choice)"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"response checklist feature\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"option_1\"),\n lb_types.ClassificationAnswer(name=\"option_2\"),\n ]),\n)\n\nresponse_checklist_annotation_ndjson = {\n \"name\": \"response checklist feature\",\n \"answer\": [{\n \"name\": \"option_1\"\n }, {\n \"name\": \"option_2\"\n }],\n}",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "response_checklist_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"response checklist feature\",\n",
+ " value=lb_types.Checklist(answer=[\n",
+ " lb_types.ClassificationAnswer(name=\"option_1\"),\n",
+ " lb_types.ClassificationAnswer(name=\"option_2\"),\n",
+ " ]),\n",
+ ")\n",
+ "\n",
+ "response_checklist_annotation_ndjson = {\n",
+ " \"name\": \"response checklist feature\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"option_1\"\n",
+ " }, {\n",
+ " \"name\": \"option_2\"\n",
+ " }],\n",
+ "}"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Response text"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "response_text_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"This is an example of a response text\"),\n)\n\nresponse_text_annotation_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is an example of a response text\",\n}",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "response_text_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"response text\",\n",
+ " value=lb_types.Text(answer=\"This is an example of a response text\"),\n",
+ ")\n",
+ "\n",
+ "response_text_annotation_ndjson = {\n",
+ " \"name\": \"response text\",\n",
+ " \"answer\": \"This is an example of a response text\",\n",
+ "}"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Nested classifications"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_response_radio_annotation_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"answer\": [{\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n }],\n}\n\nnested_response_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
- },
- {
+ "source": [
+ "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_response_radio_question\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_radio_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_radio_question\",\n",
+ " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_radio_answer\")),\n",
+ " )\n",
+ " ],\n",
+ " )),\n",
+ ")\n",
+ "\n",
+ "nested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n",
+ " name=\"nested_response_checklist_question\",\n",
+ " value=lb_types.Checklist(answer=[\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name=\"first_checklist_answer\",\n",
+ " classifications=[\n",
+ " lb_types.ClassificationAnnotation(\n",
+ " name=\"sub_checklist_question\",\n",
+ " value=lb_types.Checklist(answer=[\n",
+ " lb_types.ClassificationAnswer(\n",
+ " name=\"first_sub_checklist_answer\")\n",
+ " ]),\n",
+ " )\n",
+ " ],\n",
+ " )\n",
+ " ]),\n",
+ ")\n",
+ "\n",
+ "nested_response_radio_annotation_ndjson = {\n",
+ " \"name\":\n",
+ " \"nested_response_radio_question\",\n",
+ " \"answer\": [{\n",
+ " \"name\":\n",
+ " \"first_radio_answer\",\n",
+ " \"classifications\": [{\n",
+ " \"name\": \"sub_radio_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_sub_radio_answer\"\n",
+ " },\n",
+ " }],\n",
+ " }],\n",
+ "}\n",
+ "\n",
+ "nested_response_checklist_annotation_ndjson = {\n",
+ " \"name\":\n",
+ " \"nested_response_checklist_question\",\n",
+ " \"answer\": [{\n",
+ " \"name\":\n",
+ " \"first_checklist_answer\",\n",
+ " \"classifications\": [{\n",
+ " \"name\": \"sub_checklist_question\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_sub_checklist_answer\"\n",
+ " },\n",
+ " }],\n",
+ " }],\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Create a project and data rows using the Labelbox UI\n",
@@ -217,26 +337,33 @@
"Each type of the prompt and response generation project requires different setup. See [prompt and response project](https://docs.labelbox.com/reference/prompt-and-response-projects) for more details on the differences.\n",
"\n",
"In this tutorial, we will show how to import annotations for a prompt and response creation (humans generate prompts and responses) project. The process is also similar for prompt creation (humans generate prompts) and response creation (humans generate responses to uploaded prompts) projects. See [import prompt and response annotations](https://docs.labelbox.com/reference/import-prompt-and-response-annotations) for a tutorial and more examples on other project types."
- ],
- "cell_type": "markdown"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"### Prompt response and prompt creation\n",
"\n",
- "A prompts and responses creation project automatically generates empty data rows upon creation. You will then need to obtain either the `global_keys` or `data_row_ids` attached to the generated data rows by exporting them from the created project."
- ],
- "cell_type": "markdown"
+ "A prompts and responses creation project automatically generates empty data rows upon creation."
+ ]
},
{
- "metadata": {},
- "source": "prompt_response_project = client.create_model_evaluation_project(\n name=\"Demo prompt response project\",\n media_type=lb.MediaType.LLMPromptResponseCreation,\n dataset_name=\"Demo prompt response dataset\",\n data_row_count=1,\n)\n\nexport_task = prompt_response_project.export()\nexport_task.wait_till_done()\n\n# Check export for any errors\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nstream = export_task.get_buffered_stream()\n\n# Obtain global keys to be used later on\nglobal_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "prompt_response_project = client.create_prompt_response_generation_project(\n",
+ " name=\"Demo prompt response project\",\n",
+ " media_type=lb.MediaType.LLMPromptResponseCreation,\n",
+ " dataset_name=\"Demo prompt response dataset\",\n",
+ " data_row_count=1,\n",
+ ")"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Set up ontology\n",
@@ -246,79 +373,270 @@
"For example, if you provide a name `annotation_name` for your created annotation, you need to name the bounding box tool as `anotations_name` when setting up your ontology. The same alignment must hold true for the other tools and classifications that you create in the ontology.\n",
"\n",
"This example shows how to create an ontology containing all supported by prompt and response projects [annotation types](#supported-annotations)."
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "ontology_builder = lb.OntologyBuilder(\n tools=[],\n classifications=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.PROMPT,\n name=\"prompt text\",\n character_min=1, # Minimum character count of prompt field (optional)\n character_max=\n 20, # Maximum character count of prompt field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"response checklist feature\",\n options=[\n lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"response radio feature\",\n options=[\n lb.ResponseOption(value=\"first_radio_answer\"),\n lb.ResponseOption(value=\"second_radio_answer\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n name=\"response text\",\n character_min=\n 1, # Minimum character count of response text field (optional)\n character_max=\n 20, # Maximum character count of response text field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"nested_response_radio_question\",\n options=[\n lb.ResponseOption(\n \"first_radio_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.\n RESPONSE_RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.ResponseOption(\"first_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"nested_response_checklist_question\",\n options=[\n lb.ResponseOption(\n \"first_checklist_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.\n RESPONSE_CHECKLIST,\n name=\"sub_checklist_question\",\n options=[\n lb.ResponseOption(\"first_sub_checklist_answer\")\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\n# Create ontology\nontology = client.create_ontology(\n \"Prompt and response ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.LLMPromptResponseCreation,\n)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "ontology_builder = lb.OntologyBuilder(\n",
+ " tools=[],\n",
+ " classifications=[\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.PROMPT,\n",
+ " name=\"prompt text\",\n",
+ " character_min=1, # Minimum character count of prompt field (optional)\n",
+ " character_max=\n",
+ " 50, # Maximum character count of prompt field (optional)\n",
+ " ),\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
+ " name=\"response checklist feature\",\n",
+ " options=[\n",
+ " lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n",
+ " lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n",
+ " ],\n",
+ " ),\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
+ " name=\"response radio feature\",\n",
+ " options=[\n",
+ " lb.ResponseOption(value=\"first_radio_answer\"),\n",
+ " lb.ResponseOption(value=\"second_radio_answer\"),\n",
+ " ],\n",
+ " ),\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n",
+ " name=\"response text\",\n",
+ " character_min=\n",
+ " 1, # Minimum character count of response text field (optional)\n",
+ " character_max=\n",
+ " 50, # Maximum character count of response text field (optional)\n",
+ " ),\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
+ " name=\"nested_response_radio_question\",\n",
+ " options=[\n",
+ " lb.ResponseOption(\n",
+ " \"first_radio_answer\",\n",
+ " options=[\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
+ " name=\"sub_radio_question\",\n",
+ " options=[\n",
+ " lb.ResponseOption(\"first_sub_radio_answer\")\n",
+ " ],\n",
+ " )\n",
+ " ],\n",
+ " )\n",
+ " ],\n",
+ " ),\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
+ " name=\"nested_response_checklist_question\",\n",
+ " options=[\n",
+ " lb.ResponseOption(\n",
+ " \"first_checklist_answer\",\n",
+ " options=[\n",
+ " lb.PromptResponseClassification(\n",
+ " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
+ " name=\"sub_checklist_question\",\n",
+ " options=[\n",
+ " lb.ResponseOption(\"first_sub_checklist_answer\")\n",
+ " ],\n",
+ " )\n",
+ " ],\n",
+ " )\n",
+ " ],\n",
+ " ),\n",
+ " ],\n",
+ ")\n",
+ "\n",
+ "# Create ontology\n",
+ "ontology = client.create_ontology(\n",
+ " \"Prompt and response ontology\",\n",
+ " ontology_builder.asdict(),\n",
+ " media_type=lb.MediaType.LLMPromptResponseCreation,\n",
+ ")\n",
+ "\n",
+ "# Attach ontology to project\n",
+ "prompt_response_project.connect_ontology(ontology)"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
- "## Step 3: Create the annotations payload\n",
+ "## Step 3: Export for `global_keys`\n",
"\n",
- "For prelabeled (model-assisted labeling) scenarios, pass your payload as the value of the `predictions` parameter. For ground truths, pass the payload to the `labels` parameter."
- ],
- "cell_type": "markdown"
+ " You will then need to obtain either the `global_keys` or `data_row_ids` attached to the generated data rows by exporting them from the created project. Since the generation of data rows is an async process you will need to wait for the project data rows to be completed before exporting."
+ ]
},
{
- "metadata": {},
- "source": "# Python annotation objects\nlabel = []\nannotations = [\n prompt_annotation,\n response_radio_annotation,\n response_checklist_annotation,\n response_text_annotation,\n nested_response_radio_annotation,\n nested_response_checklist_annotation,\n]\nlabel.append(\n lb_types.Label(data={\"global_key\": global_keys[0]},\n annotations=annotations))\n\n# NDJSON\nlabel_ndjson = []\nannotations = [\n prompt_annotation_ndjson,\n response_radio_annotation_ndjson,\n response_checklist_annotation_ndjson,\n response_text_annotation_ndjson,\n nested_response_radio_annotation_ndjson,\n nested_response_checklist_annotation_ndjson,\n]\nfor annotation in annotations:\n annotation.update({\n \"dataRow\": {\n \"globalKey\": global_keys[0]\n },\n })\n label_ndjson.append(annotation)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "time.sleep(20)\n",
+ "\n",
+ "export_task = prompt_response_project.export()\n",
+ "export_task.wait_till_done()\n",
+ "\n",
+ "# Check export for any errors\n",
+ "if export_task.has_errors():\n",
+ " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n",
+ " stream_handler=lambda error: print(error))\n",
+ "\n",
+ "stream = export_task.get_buffered_stream()\n",
+ "\n",
+ "# Obtain global keys to be used later on\n",
+ "global_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]"
+ ]
},
{
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 4: Create the annotations payload\n",
+ "\n",
+ "For prelabeled (model-assisted labeling) scenarios, pass your payload as the value of the `predictions` parameter. For ground truths, pass the payload to the `labels` parameter."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Python annotation objects\n",
+ "label = []\n",
+ "annotations = [\n",
+ " prompt_annotation,\n",
+ " response_radio_annotation,\n",
+ " response_checklist_annotation,\n",
+ " response_text_annotation,\n",
+ " nested_response_radio_annotation,\n",
+ " nested_response_checklist_annotation,\n",
+ "]\n",
+ "label.append(\n",
+ " lb_types.Label(data={\"global_key\": global_keys[0]},\n",
+ " annotations=annotations))\n",
+ "\n",
+ "# NDJSON\n",
+ "label_ndjson = []\n",
+ "annotations = [\n",
+ " prompt_annotation_ndjson,\n",
+ " response_radio_annotation_ndjson,\n",
+ " response_checklist_annotation_ndjson,\n",
+ " response_text_annotation_ndjson,\n",
+ " nested_response_radio_annotation_ndjson,\n",
+ " nested_response_checklist_annotation_ndjson,\n",
+ "]\n",
+ "for annotation in annotations:\n",
+ " annotation.update({\n",
+ " \"dataRow\": {\n",
+ " \"globalKey\": global_keys[0]\n",
+ " },\n",
+ " })\n",
+ " label_ndjson.append(annotation)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Option A: Upload as [prelabels (model assisted labeling)](doc:model-assisted-labeling)\n",
"\n",
"This option is helpful for speeding up the initial labeling process and reducing the manual labeling workload for high-volume datasets."
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=prompt_response_project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "upload_job = lb.MALPredictionImport.create_from_objects(\n",
+ " client=client,\n",
+ " project_id=prompt_response_project.uid,\n",
+ " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
+ " predictions=label,\n",
+ ")\n",
+ "\n",
+ "upload_job.wait_until_done()\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"#### Option B: Upload to a labeling project as [ground truth](doc:import-ground-truth)\n",
"\n",
"This option is helpful for loading high-confidence labels from another platform or previous projects that just need review rather than manual labeling effort."
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=prompt_response_project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label_ndjson,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "upload_job = lb.LabelImport.create_from_objects(\n",
+ " client=client,\n",
+ " project_id=prompt_response_project.uid,\n",
+ " name=\"label_import_job\" + str(uuid.uuid4()),\n",
+ " labels=label_ndjson,\n",
+ ")\n",
+ "\n",
+ "upload_job.wait_until_done()\n",
+ "print(\"Errors:\", upload_job.errors)\n",
+ "print(\"Status of uploads: \", upload_job.statuses)"
+ ]
},
{
+ "cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up\n",
"\n",
"Uncomment and run the cell below to optionally delete Labelbox objects created"
- ],
- "cell_type": "markdown"
+ ]
},
{
- "metadata": {},
- "source": "# project.delete()\n# client.delete_unused_ontology(ontology.uid)",
"cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
- "execution_count": null
+ "source": [
+ "# project.delete()\n",
+ "# client.delete_unused_ontology(ontology.uid)"
+ ]
}
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From 8561bca3df161e7cba16db76dd0a09b820d45636 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Tue, 30 Jul 2024 01:30:06 +0000
Subject: [PATCH 2/2] :art: Cleaned
---
annotation_import/prompt_response.ipynb | 506 +++++-------------------
1 file changed, 102 insertions(+), 404 deletions(-)
diff --git a/annotation_import/prompt_response.ipynb b/annotation_import/prompt_response.ipynb
index 38965ae..4694c70 100644
--- a/annotation_import/prompt_response.ipynb
+++ b/annotation_import/prompt_response.ipynb
@@ -1,16 +1,18 @@
{
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "metadata": {},
"cells": [
{
- "cell_type": "markdown",
"metadata": {},
"source": [
- "\n",
- " \n",
+ " | ",
+ " ",
" | \n"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"\n",
@@ -22,19 +24,19 @@
" \n",
" | "
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"# Prompt and response projects with MAL and Ground Truth\n",
"\n",
"This notebook is meant to showcase how to generate prompts and responses to fine-tune large language models (LLMs) using MAL and Ground truth"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Annotation payload types\n",
@@ -49,10 +51,10 @@
"- JSON\n",
" - Skips formatting annotation payload in the Labelbox Python annotation type.\n",
" - Supports any levels of nested classification (radio, checklist, or free-form text) under a tool or classification annotation."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Label Import Types\n",
@@ -61,57 +63,47 @@
"\n",
"- [Model-assisted labeling (MAL)](https://docs.labelbox.com/docs/model-assisted-labeling) allows you to import computer-generated predictions and simple annotations created outside of Labelbox as pre-labels on an asset.\n",
"- [Ground truth](hhttps://docs.labelbox.com/docs/import-ground-truth) allows you to bulk import ground truth annotations from an external or third-party labeling system into Labelbox _Annotate_. Using the label import API to import external data can consolidate and migrate all annotations into Labelbox as a single source of truth."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Set up "
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "%pip install -q --upgrade \"labelbox[data]\"",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "%pip install -q --upgrade \"labelbox[data]\""
- ]
+ "execution_count": null
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport time\nimport uuid",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "import labelbox as lb\n",
- "import labelbox.types as lb_types\n",
- "import time\n",
- "import uuid"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"### Replace with your API key\n",
"\n",
"Replace the value of `API_KEY` with a valid [API key]([ref:create-api-key](https://docs.labelbox.com/reference/create-api-key)) to connect to the Labelbox client."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "API_KEY = None\n",
- "client = lb.Client(api_key=API_KEY)"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Supported Annotations\n",
@@ -130,206 +122,94 @@
"- Response creation projects\n",
" - Radio\n",
" - Checklist"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"### Prompt"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Prompt text"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "prompt_annotation = lb_types.PromptClassificationAnnotation(\n name=\"prompt text\",\n value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n)\n\nprompt_annotation_ndjson = {\n \"name\": \"prompt text\",\n \"answer\": \"This is an example of a prompt\",\n}",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "prompt_annotation = lb_types.PromptClassificationAnnotation(\n",
- " name=\"prompt text\",\n",
- " value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n",
- ")\n",
- "\n",
- "prompt_annotation_ndjson = {\n",
- " \"name\": \"prompt text\",\n",
- " \"answer\": \"This is an example of a prompt\",\n",
- "}"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"### Responses"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Radio (single-choice)"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"response radio feature\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nresponse_radio_annotation_ndjson = {\n \"name\": \"response radio feature\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "response_radio_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"response radio feature\",\n",
- " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
- " name=\"first_radio_answer\")),\n",
- ")\n",
- "\n",
- "response_radio_annotation_ndjson = {\n",
- " \"name\": \"response radio feature\",\n",
- " \"answer\": {\n",
- " \"name\": \"first_radio_answer\"\n",
- " },\n",
- "}"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Checklist (multi-choice)"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"response checklist feature\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"option_1\"),\n lb_types.ClassificationAnswer(name=\"option_2\"),\n ]),\n)\n\nresponse_checklist_annotation_ndjson = {\n \"name\": \"response checklist feature\",\n \"answer\": [{\n \"name\": \"option_1\"\n }, {\n \"name\": \"option_2\"\n }],\n}",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "response_checklist_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"response checklist feature\",\n",
- " value=lb_types.Checklist(answer=[\n",
- " lb_types.ClassificationAnswer(name=\"option_1\"),\n",
- " lb_types.ClassificationAnswer(name=\"option_2\"),\n",
- " ]),\n",
- ")\n",
- "\n",
- "response_checklist_annotation_ndjson = {\n",
- " \"name\": \"response checklist feature\",\n",
- " \"answer\": [{\n",
- " \"name\": \"option_1\"\n",
- " }, {\n",
- " \"name\": \"option_2\"\n",
- " }],\n",
- "}"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Response text"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "response_text_annotation = lb_types.ClassificationAnnotation(\n name=\"response text\",\n value=lb_types.Text(answer=\"This is an example of a response text\"),\n)\n\nresponse_text_annotation_ndjson = {\n \"name\": \"response text\",\n \"answer\": \"This is an example of a response text\",\n}",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "response_text_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"response text\",\n",
- " value=lb_types.Text(answer=\"This is an example of a response text\"),\n",
- ")\n",
- "\n",
- "response_text_annotation_ndjson = {\n",
- " \"name\": \"response text\",\n",
- " \"answer\": \"This is an example of a response text\",\n",
- "}"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Nested classifications"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_response_radio_annotation_ndjson = {\n \"name\":\n \"nested_response_radio_question\",\n \"answer\": [{\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n }],\n}\n\nnested_response_checklist_annotation_ndjson = {\n \"name\":\n \"nested_response_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"nested_response_radio_question\",\n",
- " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
- " name=\"first_radio_answer\",\n",
- " classifications=[\n",
- " lb_types.ClassificationAnnotation(\n",
- " name=\"sub_radio_question\",\n",
- " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
- " name=\"first_sub_radio_answer\")),\n",
- " )\n",
- " ],\n",
- " )),\n",
- ")\n",
- "\n",
- "nested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n",
- " name=\"nested_response_checklist_question\",\n",
- " value=lb_types.Checklist(answer=[\n",
- " lb_types.ClassificationAnswer(\n",
- " name=\"first_checklist_answer\",\n",
- " classifications=[\n",
- " lb_types.ClassificationAnnotation(\n",
- " name=\"sub_checklist_question\",\n",
- " value=lb_types.Checklist(answer=[\n",
- " lb_types.ClassificationAnswer(\n",
- " name=\"first_sub_checklist_answer\")\n",
- " ]),\n",
- " )\n",
- " ],\n",
- " )\n",
- " ]),\n",
- ")\n",
- "\n",
- "nested_response_radio_annotation_ndjson = {\n",
- " \"name\":\n",
- " \"nested_response_radio_question\",\n",
- " \"answer\": [{\n",
- " \"name\":\n",
- " \"first_radio_answer\",\n",
- " \"classifications\": [{\n",
- " \"name\": \"sub_radio_question\",\n",
- " \"answer\": {\n",
- " \"name\": \"first_sub_radio_answer\"\n",
- " },\n",
- " }],\n",
- " }],\n",
- "}\n",
- "\n",
- "nested_response_checklist_annotation_ndjson = {\n",
- " \"name\":\n",
- " \"nested_response_checklist_question\",\n",
- " \"answer\": [{\n",
- " \"name\":\n",
- " \"first_checklist_answer\",\n",
- " \"classifications\": [{\n",
- " \"name\": \"sub_checklist_question\",\n",
- " \"answer\": {\n",
- " \"name\": \"first_sub_checklist_answer\"\n",
- " },\n",
- " }],\n",
- " }],\n",
- "}"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": null
+ },
+ {
"metadata": {},
"source": [
"## Step 1: Create a project and data rows using the Labelbox UI\n",
@@ -337,33 +217,26 @@
"Each type of the prompt and response generation project requires different setup. See [prompt and response project](https://docs.labelbox.com/reference/prompt-and-response-projects) for more details on the differences.\n",
"\n",
"In this tutorial, we will show how to import annotations for a prompt and response creation (humans generate prompts and responses) project. The process is also similar for prompt creation (humans generate prompts) and response creation (humans generate responses to uploaded prompts) projects. See [import prompt and response annotations](https://docs.labelbox.com/reference/import-prompt-and-response-annotations) for a tutorial and more examples on other project types."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"### Prompt response and prompt creation\n",
"\n",
"A prompts and responses creation project automatically generates empty data rows upon creation."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "prompt_response_project = client.create_prompt_response_generation_project(\n name=\"Demo prompt response project\",\n media_type=lb.MediaType.LLMPromptResponseCreation,\n dataset_name=\"Demo prompt response dataset\",\n data_row_count=1,\n)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "prompt_response_project = client.create_prompt_response_generation_project(\n",
- " name=\"Demo prompt response project\",\n",
- " media_type=lb.MediaType.LLMPromptResponseCreation,\n",
- " dataset_name=\"Demo prompt response dataset\",\n",
- " data_row_count=1,\n",
- ")"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Set up ontology\n",
@@ -373,270 +246,95 @@
"For example, if you provide a name `annotation_name` for your created annotation, you need to name the bounding box tool as `anotations_name` when setting up your ontology. The same alignment must hold true for the other tools and classifications that you create in the ontology.\n",
"\n",
"This example shows how to create an ontology containing all supported by prompt and response projects [annotation types](#supported-annotations)."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "ontology_builder = lb.OntologyBuilder(\n tools=[],\n classifications=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.PROMPT,\n name=\"prompt text\",\n character_min=1, # Minimum character count of prompt field (optional)\n character_max=\n 50, # Maximum character count of prompt field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"response checklist feature\",\n options=[\n lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"response radio feature\",\n options=[\n lb.ResponseOption(value=\"first_radio_answer\"),\n lb.ResponseOption(value=\"second_radio_answer\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n name=\"response text\",\n character_min=\n 1, # Minimum character count of response text field (optional)\n character_max=\n 50, # Maximum character count of response text field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"nested_response_radio_question\",\n options=[\n lb.ResponseOption(\n \"first_radio_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.\n RESPONSE_RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.ResponseOption(\"first_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"nested_response_checklist_question\",\n options=[\n lb.ResponseOption(\n \"first_checklist_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.\n RESPONSE_CHECKLIST,\n name=\"sub_checklist_question\",\n options=[\n lb.ResponseOption(\"first_sub_checklist_answer\")\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\n# Create ontology\nontology = client.create_ontology(\n \"Prompt and response ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.LLMPromptResponseCreation,\n)\n\n# Attach ontology to project\nprompt_response_project.connect_ontology(ontology)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "ontology_builder = lb.OntologyBuilder(\n",
- " tools=[],\n",
- " classifications=[\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.PROMPT,\n",
- " name=\"prompt text\",\n",
- " character_min=1, # Minimum character count of prompt field (optional)\n",
- " character_max=\n",
- " 50, # Maximum character count of prompt field (optional)\n",
- " ),\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
- " name=\"response checklist feature\",\n",
- " options=[\n",
- " lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n",
- " lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n",
- " ],\n",
- " ),\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
- " name=\"response radio feature\",\n",
- " options=[\n",
- " lb.ResponseOption(value=\"first_radio_answer\"),\n",
- " lb.ResponseOption(value=\"second_radio_answer\"),\n",
- " ],\n",
- " ),\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n",
- " name=\"response text\",\n",
- " character_min=\n",
- " 1, # Minimum character count of response text field (optional)\n",
- " character_max=\n",
- " 50, # Maximum character count of response text field (optional)\n",
- " ),\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
- " name=\"nested_response_radio_question\",\n",
- " options=[\n",
- " lb.ResponseOption(\n",
- " \"first_radio_answer\",\n",
- " options=[\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n",
- " name=\"sub_radio_question\",\n",
- " options=[\n",
- " lb.ResponseOption(\"first_sub_radio_answer\")\n",
- " ],\n",
- " )\n",
- " ],\n",
- " )\n",
- " ],\n",
- " ),\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
- " name=\"nested_response_checklist_question\",\n",
- " options=[\n",
- " lb.ResponseOption(\n",
- " \"first_checklist_answer\",\n",
- " options=[\n",
- " lb.PromptResponseClassification(\n",
- " class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n",
- " name=\"sub_checklist_question\",\n",
- " options=[\n",
- " lb.ResponseOption(\"first_sub_checklist_answer\")\n",
- " ],\n",
- " )\n",
- " ],\n",
- " )\n",
- " ],\n",
- " ),\n",
- " ],\n",
- ")\n",
- "\n",
- "# Create ontology\n",
- "ontology = client.create_ontology(\n",
- " \"Prompt and response ontology\",\n",
- " ontology_builder.asdict(),\n",
- " media_type=lb.MediaType.LLMPromptResponseCreation,\n",
- ")\n",
- "\n",
- "# Attach ontology to project\n",
- "prompt_response_project.connect_ontology(ontology)"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Step 3: Export for `global_keys`\n",
"\n",
" You will then need to obtain either the `global_keys` or `data_row_ids` attached to the generated data rows by exporting them from the created project. Since the generation of data rows is an async process you will need to wait for the project data rows to be completed before exporting."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "time.sleep(20)\n\nexport_task = prompt_response_project.export()\nexport_task.wait_till_done()\n\n# Check export for any errors\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nstream = export_task.get_buffered_stream()\n\n# Obtain global keys to be used later on\nglobal_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "time.sleep(20)\n",
- "\n",
- "export_task = prompt_response_project.export()\n",
- "export_task.wait_till_done()\n",
- "\n",
- "# Check export for any errors\n",
- "if export_task.has_errors():\n",
- " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n",
- " stream_handler=lambda error: print(error))\n",
- "\n",
- "stream = export_task.get_buffered_stream()\n",
- "\n",
- "# Obtain global keys to be used later on\n",
- "global_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Step 4: Create the annotations payload\n",
"\n",
"For prelabeled (model-assisted labeling) scenarios, pass your payload as the value of the `predictions` parameter. For ground truths, pass the payload to the `labels` parameter."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "# Python annotation objects\nlabel = []\nannotations = [\n prompt_annotation,\n response_radio_annotation,\n response_checklist_annotation,\n response_text_annotation,\n nested_response_radio_annotation,\n nested_response_checklist_annotation,\n]\nlabel.append(\n lb_types.Label(data={\"global_key\": global_keys[0]},\n annotations=annotations))\n\n# NDJSON\nlabel_ndjson = []\nannotations = [\n prompt_annotation_ndjson,\n response_radio_annotation_ndjson,\n response_checklist_annotation_ndjson,\n response_text_annotation_ndjson,\n nested_response_radio_annotation_ndjson,\n nested_response_checklist_annotation_ndjson,\n]\nfor annotation in annotations:\n annotation.update({\n \"dataRow\": {\n \"globalKey\": global_keys[0]\n },\n })\n label_ndjson.append(annotation)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "# Python annotation objects\n",
- "label = []\n",
- "annotations = [\n",
- " prompt_annotation,\n",
- " response_radio_annotation,\n",
- " response_checklist_annotation,\n",
- " response_text_annotation,\n",
- " nested_response_radio_annotation,\n",
- " nested_response_checklist_annotation,\n",
- "]\n",
- "label.append(\n",
- " lb_types.Label(data={\"global_key\": global_keys[0]},\n",
- " annotations=annotations))\n",
- "\n",
- "# NDJSON\n",
- "label_ndjson = []\n",
- "annotations = [\n",
- " prompt_annotation_ndjson,\n",
- " response_radio_annotation_ndjson,\n",
- " response_checklist_annotation_ndjson,\n",
- " response_text_annotation_ndjson,\n",
- " nested_response_radio_annotation_ndjson,\n",
- " nested_response_checklist_annotation_ndjson,\n",
- "]\n",
- "for annotation in annotations:\n",
- " annotation.update({\n",
- " \"dataRow\": {\n",
- " \"globalKey\": global_keys[0]\n",
- " },\n",
- " })\n",
- " label_ndjson.append(annotation)"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": null
+ },
+ {
"metadata": {},
"source": [
"#### Option A: Upload as [prelabels (model assisted labeling)](doc:model-assisted-labeling)\n",
"\n",
"This option is helpful for speeding up the initial labeling process and reducing the manual labeling workload for high-volume datasets."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=prompt_response_project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "upload_job = lb.MALPredictionImport.create_from_objects(\n",
- " client=client,\n",
- " project_id=prompt_response_project.uid,\n",
- " name=f\"mal_job-{str(uuid.uuid4())}\",\n",
- " predictions=label,\n",
- ")\n",
- "\n",
- "upload_job.wait_until_done()\n",
- "print(\"Errors:\", upload_job.errors)\n",
- "print(\"Status of uploads: \", upload_job.statuses)"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"#### Option B: Upload to a labeling project as [ground truth](doc:import-ground-truth)\n",
"\n",
"This option is helpful for loading high-confidence labels from another platform or previous projects that just need review rather than manual labeling effort."
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=prompt_response_project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label_ndjson,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "upload_job = lb.LabelImport.create_from_objects(\n",
- " client=client,\n",
- " project_id=prompt_response_project.uid,\n",
- " name=\"label_import_job\" + str(uuid.uuid4()),\n",
- " labels=label_ndjson,\n",
- ")\n",
- "\n",
- "upload_job.wait_until_done()\n",
- "print(\"Errors:\", upload_job.errors)\n",
- "print(\"Status of uploads: \", upload_job.statuses)"
- ]
+ "execution_count": null
},
{
- "cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up\n",
"\n",
"Uncomment and run the cell below to optionally delete Labelbox objects created"
- ]
+ ],
+ "cell_type": "markdown"
},
{
- "cell_type": "code",
- "execution_count": null,
"metadata": {},
+ "source": "# project.delete()\n# client.delete_unused_ontology(ontology.uid)",
+ "cell_type": "code",
"outputs": [],
- "source": [
- "# project.delete()\n",
- "# client.delete_unused_ontology(ontology.uid)"
- ]
+ "execution_count": null
}
- ],
- "metadata": {
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+ ]
+}
\ No newline at end of file