From e51e1cd8c8f265e228010ef711b045106e382a11 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 28 May 2024 10:49:51 -0500 Subject: [PATCH 1/2] changed global keys to use a name with uuid to prevent notebooks from breaking --- examples/annotation_import/audio.ipynb | 356 +++- .../annotation_import/conversational.ipynb | 519 ++++- .../conversational_LLM.ipynb | 565 +++++- examples/annotation_import/dicom.ipynb | 399 +++- examples/annotation_import/html.ipynb | 476 ++++- examples/annotation_import/image.ipynb | 1023 ++++++++-- examples/annotation_import/pdf.ipynb | 1030 ++++++++-- examples/annotation_import/text.ipynb | 519 ++++- examples/annotation_import/tiled.ipynb | 835 ++++++-- examples/annotation_import/video.ipynb | 1205 +++++++++-- examples/basics/data_row_metadata.ipynb | 352 +++- .../custom_metrics_demo.ipynb | 1806 +++++++++++++++-- .../model_predictions_to_project.ipynb | 321 ++- .../conversational_LLM_predictions.ipynb | 732 +++++-- .../conversational_predictions.ipynb | 690 +++++-- .../geospatial_predictions.ipynb | 1041 ++++++++-- .../prediction_upload/html_predictions.ipynb | 604 ++++-- .../prediction_upload/image_predictions.ipynb | 1156 +++++++++-- .../prediction_upload/pdf_predictions.ipynb | 1046 ++++++++-- .../prediction_upload/text_predictions.ipynb | 635 ++++-- .../prediction_upload/video_predictions.ipynb | 1376 +++++++++++-- examples/project_configuration/webhooks.ipynb | 289 ++- 22 files changed, 14388 insertions(+), 2587 deletions(-) diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb index c798be914..22d25a130 100644 --- a/examples/annotation_import/audio.ipynb +++ b/examples/annotation_import/audio.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Audio Annotation Import\n", @@ -53,111 +51,188 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for Audio" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_audio\",\n value=lb_types.Text(answer=\"free text audio annotation\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_audio\",\n \"answer\": \"free text audio annotation\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_audio\",\n", + " value=lb_types.Text(answer=\"free text audio annotation\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"text_audio\",\n", + " \"answer\": \"free text audio annotation\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_audio\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_audio\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Checklist Classification #######\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_audio\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_audio\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_audio\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_audio\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_audio\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_audio\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample-audio-1.mp3\"\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n", + "\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -165,135 +240,232 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_audio`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_audio`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"text_audio\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_audio\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_audio\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + "])\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Audio Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Audio,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(name=\"audio_project\",\n media_type=lb.MediaType.Audio)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"audio_project\",\n", + " media_type=lb.MediaType.Audio)\n", + "\n", + "# Setup your ontology\n", + "project.setup_editor(\n", + " ontology) # Connect your ontology and editor to your project" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[text_annotation, checklist_annotation, radio_annotation],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb index d62ae9ee8..1e4c28de1 100644 --- a/examples/annotation_import/conversational.ipynb +++ b/examples/annotation_import/conversational.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Conversational Text Annotation Import\n", @@ -55,125 +53,290 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for conversational text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# message based classifications\nner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\nner_annotation_ndjson = {\n \"name\": \"ner\",\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"4\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# message based classifications\n", + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", + ")\n", + "\n", + "ner_annotation_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"4\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n message_id=\"0\",\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_convo\",\n \"answer\": \"the answer to the text questions right here\",\n \"messageId\": \"0\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_convo\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"text_convo\",\n", + " \"answer\": \"the answer to the text questions right here\",\n", + " \"messageId\": \"0\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"2\",\n)\n\nchecklist_annotation_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n \"messageId\": \"2\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Checklist Classification #######\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " message_id=\"2\",\n", + ")\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + " \"messageId\": \"2\",\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n message_id=\"0\",\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_convo\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n \"messageId\": \"0\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_convo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_convo\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + " \"messageId\": \"0\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n# Message based\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"10\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n# Global\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\n# Global\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# ############ global nested classifications ###########\n", + "# Message based\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"10\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",)\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "# Message based\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"messageId\":\n", + " \"10\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "# Global\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "# Global\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create one Labelbox dataset\n\nglobal_key = \"conversation-1.json\"\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"conversational_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"conversation-1.json\" + str(uuid.uuid4())\n", + "\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(\n", + " name=\"conversational_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -181,135 +344,283 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n scope=lb.Classification.Scope.INDEX,\n name=\"text_convo\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_convo\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Conversation Annotations\",\n ontology_builder.asdict())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"text_convo\",\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_convo\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Conversation Annotations\",\n", + " ontology_builder.asdict())" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(\n", + " name=\"Conversational Text Annotation Import Demo\",\n", + " media_type=lb.MediaType.Conversational,\n", + ")\n", + "\n", + "# Setup your ontology\n", + "project.setup_editor(\n", + " ontology) # Connect your ontology and editor to your project" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. However,for conversational texts NDJSON is the only supported format. " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n ner_annotation_ndjson,\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " ner_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/conversational_LLM.ipynb b/examples/annotation_import/conversational_LLM.ipynb index fa1fe02b0..733607bde 100644 --- a/examples/annotation_import/conversational_LLM.ipynb +++ b/examples/annotation_import/conversational_LLM.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,137 +22,270 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n", "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Set up" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Supported annotations for conversational text" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Entity " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nner_annotation_ndjson = {\n \"name\": \"ner\",\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"message-1\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", + ")\n", + "\n", + "ner_annotation_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"message-1\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "radio_annotation = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"Response B\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"Choose the best response\",\n \"answer\": {\n \"name\": \"Response B\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"Response B\")),\n", + ")\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"Choose the best response\",\n", + " \"answer\": {\n", + " \"name\": \"Response B\"\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "text_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is the more concise answer\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"Provide a reason for your choice\",\n", + " \"answer\": \"This is the more concise answer\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nchecklist_annotation_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n \"messageId\": \"message-1\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " message_id=\"message-1\", # Message specific annotation\n", + ")\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + " \"messageId\": \"message-1\",\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Message based\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"message-1\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n },\n }],\n }],\n}\n# Global\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# Global\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Message based\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "# Message based\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"messageId\":\n", + " \"message-1\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "# Global\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# Global\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows with \"modelOutputs\" into Catalog" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", @@ -168,141 +299,351 @@ " }\n", "]\n", "```\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Example of row_data with model outputs" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "pairwise_shopping_2 = \"\"\"\n {\n \"type\": \"application/vnd.labelbox.conversational\",\n \"version\": 1,\n \"messages\": [\n {\n \"messageId\": \"message-0\",\n \"timestampUsec\": 1530718491,\n \"content\": \"Hi! How can I help?\",\n \"user\": {\n \"userId\": \"Bot 002\",\n \"name\": \"Bot\"\n },\n \"align\": \"left\",\n \"canLabel\": false\n },\n {\n \"messageId\": \"message-1\",\n \"timestampUsec\": 1530718503,\n \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n \"user\": {\n \"userId\": \"User 00686\",\n \"name\": \"User\"\n },\n \"align\": \"right\",\n \"canLabel\": true\n }\n\n ],\n \"modelOutputs\": [\n {\n \"title\": \"Response A\",\n \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n },\n {\n \"title\": \"Response B\",\n \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n }\n ]\n}\n\"\"\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "pairwise_shopping_2 = \"\"\"\n", + " {\n", + " \"type\": \"application/vnd.labelbox.conversational\",\n", + " \"version\": 1,\n", + " \"messages\": [\n", + " {\n", + " \"messageId\": \"message-0\",\n", + " \"timestampUsec\": 1530718491,\n", + " \"content\": \"Hi! How can I help?\",\n", + " \"user\": {\n", + " \"userId\": \"Bot 002\",\n", + " \"name\": \"Bot\"\n", + " },\n", + " \"align\": \"left\",\n", + " \"canLabel\": false\n", + " },\n", + " {\n", + " \"messageId\": \"message-1\",\n", + " \"timestampUsec\": 1530718503,\n", + " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", + " \"user\": {\n", + " \"userId\": \"User 00686\",\n", + " \"name\": \"User\"\n", + " },\n", + " \"align\": \"right\",\n", + " \"canLabel\": true\n", + " }\n", + "\n", + " ],\n", + " \"modelOutputs\": [\n", + " {\n", + " \"title\": \"Response A\",\n", + " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", + " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", + " },\n", + " {\n", + " \"title\": \"Response B\",\n", + " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", + " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ] }, { - "metadata": {}, - "source": "global_key = \"pairwise_shooping_asset\"\n\n# Upload data rows\nconvo_data = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n \"global_key\":\n global_key,\n}\n\n# Create a dataset\ndataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n# Create a datarows\ntask = dataset.create_data_rows([convo_data])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\n", + "\n", + "# Upload data rows\n", + "convo_data = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n", + "# Create a datarows\n", + "task = dataset.create_data_rows([convo_data])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create an ontology with relevant classifications\n\nontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n scope=lb.Classification.Scope.GLOBAL,\n name=\"Choose the best response\",\n options=[\n lb.Option(value=\"Response A\"),\n lb.Option(value=\"Response B\"),\n lb.Option(value=\"Tie\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"Provide a reason for your choice\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Pairwise comparison ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Conversational,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create an ontology with relevant classifications\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " name=\"Choose the best response\",\n", + " options=[\n", + " lb.Option(value=\"Response A\"),\n", + " lb.Option(value=\"Response B\"),\n", + " lb.Option(value=\"Tie\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"Provide a reason for your choice\",\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Pairwise comparison ontology\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Conversational,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(\n", + " name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n", + " media_type=lb.MediaType.Conversational,\n", + ")\n", + "\n", + "# Setup your ontology\n", + "project.setup_editor(\n", + " ontology) # Connect your ontology and editor to your project" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a batch to send to your project\nbatch = project.create_batch(\n \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a batch to send to your project\n", + "batch = project.create_batch(\n", + " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "NDJSON annotation" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n ner_annotation_ndjson,\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " ner_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload annotations to a project as pre-labels or complete labels " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Model Assisted Labeling (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/annotation_import/dicom.ipynb b/examples/annotation_import/dicom.ipynb index cd4c88497..42a373e18 100644 --- a/examples/annotation_import/dicom.ipynb +++ b/examples/annotation_import/dicom.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# DICOM Annotation Import\n", @@ -43,88 +41,237 @@ " * Free form text classifications\n", " * Radio classifications \n", " * Checklist classifications" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = None\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = None\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for DICOM\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "######## Polyline ########\npolyline_annotation = [\n lb_types.DICOMObjectAnnotation(\n name=\"line_dicom\",\n group_key=lb_types.GroupKey.AXIAL,\n frame=1,\n value=lb_types.Line(points=[\n lb_types.Point(x=10, y=10),\n lb_types.Point(x=200, y=20),\n lb_types.Point(x=250, y=250),\n ]),\n segment_index=0,\n keyframe=True,\n ),\n lb_types.DICOMObjectAnnotation(\n name=\"line_dicom\",\n group_key=lb_types.GroupKey.AXIAL,\n frame=20,\n value=lb_types.Line(points=[\n lb_types.Point(x=10, y=10),\n lb_types.Point(x=200, y=10),\n lb_types.Point(x=300, y=300),\n ]),\n segment_index=1,\n keyframe=True,\n ),\n]\n\npolyline_annotation_ndjson = {\n \"name\":\n \"line_dicom\",\n \"groupKey\":\n \"axial\", # should be 'axial', 'sagittal', or 'coronal'\n \"segments\": [\n {\n \"keyframes\": [{\n \"frame\":\n 1,\n \"line\": [\n {\n \"x\": 10,\n \"y\": 10\n },\n {\n \"x\": 200,\n \"y\": 20\n },\n {\n \"x\": 250,\n \"y\": 250\n },\n ],\n }]\n },\n {\n \"keyframes\": [{\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 10,\n \"y\": 10\n },\n {\n \"x\": 200,\n \"y\": 10\n },\n {\n \"x\": 300,\n \"y\": 300\n },\n ],\n }]\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Polyline ########\n", + "polyline_annotation = [\n", + " lb_types.DICOMObjectAnnotation(\n", + " name=\"line_dicom\",\n", + " group_key=lb_types.GroupKey.AXIAL,\n", + " frame=1,\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=10, y=10),\n", + " lb_types.Point(x=200, y=20),\n", + " lb_types.Point(x=250, y=250),\n", + " ]),\n", + " segment_index=0,\n", + " keyframe=True,\n", + " ),\n", + " lb_types.DICOMObjectAnnotation(\n", + " name=\"line_dicom\",\n", + " group_key=lb_types.GroupKey.AXIAL,\n", + " frame=20,\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=10, y=10),\n", + " lb_types.Point(x=200, y=10),\n", + " lb_types.Point(x=300, y=300),\n", + " ]),\n", + " segment_index=1,\n", + " keyframe=True,\n", + " ),\n", + "]\n", + "\n", + "polyline_annotation_ndjson = {\n", + " \"name\":\n", + " \"line_dicom\",\n", + " \"groupKey\":\n", + " \"axial\", # should be 'axial', 'sagittal', or 'coronal'\n", + " \"segments\": [\n", + " {\n", + " \"keyframes\": [{\n", + " \"frame\":\n", + " 1,\n", + " \"line\": [\n", + " {\n", + " \"x\": 10,\n", + " \"y\": 10\n", + " },\n", + " {\n", + " \"x\": 200,\n", + " \"y\": 20\n", + " },\n", + " {\n", + " \"x\": 250,\n", + " \"y\": 250\n", + " },\n", + " ],\n", + " }]\n", + " },\n", + " {\n", + " \"keyframes\": [{\n", + " \"frame\":\n", + " 20,\n", + " \"line\": [\n", + " {\n", + " \"x\": 10,\n", + " \"y\": 10\n", + " },\n", + " {\n", + " \"x\": 200,\n", + " \"y\": 10\n", + " },\n", + " {\n", + " \"x\": 300,\n", + " \"y\": 300\n", + " },\n", + " ],\n", + " }]\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Segmentation Masks ########\n\nmask_annotation = [\n lb_types.DICOMMaskAnnotation(\n group_key=\"axial\",\n frames=[\n lb_types.MaskFrame(\n index=1,\n instance_uri=\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n ),\n lb_types.MaskFrame(\n index=5,\n instance_uri=\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n ),\n ],\n instances=[\n lb_types.MaskInstance(color_rgb=(255, 255, 255),\n name=\"segmentation_mask_dicom\")\n ],\n )\n]\n\nmask_annotation_ndjson = {\n \"groupKey\": \"axial\",\n \"masks\": {\n \"frames\": [\n {\n \"index\":\n 1,\n \"instanceURI\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n },\n {\n \"index\":\n 5,\n \"instanceURI\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n },\n ],\n \"instances\": [{\n \"colorRGB\": (255, 255, 255),\n \"name\": \"segmentation_mask_dicom\"\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Segmentation Masks ########\n", + "\n", + "mask_annotation = [\n", + " lb_types.DICOMMaskAnnotation(\n", + " group_key=\"axial\",\n", + " frames=[\n", + " lb_types.MaskFrame(\n", + " index=1,\n", + " instance_uri=\n", + " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", + " ),\n", + " lb_types.MaskFrame(\n", + " index=5,\n", + " instance_uri=\n", + " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", + " ),\n", + " ],\n", + " instances=[\n", + " lb_types.MaskInstance(color_rgb=(255, 255, 255),\n", + " name=\"segmentation_mask_dicom\")\n", + " ],\n", + " )\n", + "]\n", + "\n", + "mask_annotation_ndjson = {\n", + " \"groupKey\": \"axial\",\n", + " \"masks\": {\n", + " \"frames\": [\n", + " {\n", + " \"index\":\n", + " 1,\n", + " \"instanceURI\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", + " },\n", + " {\n", + " \"index\":\n", + " 5,\n", + " \"instanceURI\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", + " },\n", + " ],\n", + " \"instances\": [{\n", + " \"colorRGB\": (255, 255, 255),\n", + " \"name\": \"segmentation_mask_dicom\"\n", + " }],\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"sample-dicom-1.dcm\"\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"dicom_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors :\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"sample-dicom-1.dcm\" + str(uuid.uuid4())\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"dicom_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors :\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -134,133 +281,215 @@ "\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(tools=[\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"segmentation_mask_dicom\",\n ),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_dicom\"),\n])\n\nontology = client.create_ontology(\n \"Ontology DICOM Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Dicom,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(tools=[\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", + " name=\"segmentation_mask_dicom\",\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_dicom\"),\n", + "])\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology DICOM Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Dicom,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"dicom_project_demo\",\n media_type=lb.MediaType.Dicom)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "\n", + "project = client.create_project(name=\"dicom_project_demo\",\n", + " media_type=lb.MediaType.Dicom)\n", + "\n", + "## connect ontology to your project\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create batches\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-dicom-demo\", # Each batch in a project must have a unique name\n global_keys=[global_key\n ], # a list of data row objects, data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create batches\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-dicom-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key\n", + " ], # a list of data row objects, data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above.\n", "\n", "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "annotations_list = polyline_annotation + mask_annotation\nlabels = [\n lb_types.Label(data={\"global_key\": global_key},\n annotations=annotations_list)\n]", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "annotations_list = polyline_annotation + mask_annotation\n", + "labels = [\n", + " lb_types.Label(data={\"global_key\": global_key},\n", + " annotations=annotations_list)\n", + "]" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations\n", "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created above." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\n\nfor annotation in [polyline_annotation_ndjson, mask_annotation_ndjson]:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "\n", + "for annotation in [polyline_annotation_ndjson, mask_annotation_ndjson]:\n", + " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotation)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job-\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)\nprint(\" \")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload MAL label for this data row in project\n", + "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_import_job-\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job_mal.wait_until_done()\n", + "print(\"Errors:\", upload_job_mal.errors)\n", + "print(\"Status of uploads: \", upload_job_mal.statuses)\n", + "print(\" \")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_label_import = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job-\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_label_import.wait_until_done()\nprint(\"Errors:\", upload_job_label_import.errors)\nprint(\"Status of uploads: \", upload_job_label_import.statuses)\nprint(\" \")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_label_import = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job-\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_label_import.wait_until_done()\n", + "print(\"Errors:\", upload_job_label_import.errors)\n", + "print(\"Status of uploads: \", upload_job_label_import.statuses)\n", + "print(\" \")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Delete Project\n# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Delete Project\n", + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/html.ipynb b/examples/annotation_import/html.ipynb index 7ef731bce..7a1de2556 100644 --- a/examples/annotation_import/html.ipynb +++ b/examples/annotation_import/html.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# HTML Annotation Import\n", @@ -53,118 +51,262 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for HTML" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_html\",\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_html\",\n \"answer\": \"sample text\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Classification free text #####\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_html\",\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"text_html\",\n", + " \"answer\": \"sample text\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_html\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_html\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Checklist Classification #######\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_html\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_html\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_html\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_html\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_html\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_html\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio and Checklist (with subclassifcations) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "########## Classification - Radio and Checklist (with subclassifcations) ##########\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample_html_1.html\"\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"html_annotation_import_demo_dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"sample_html_1.html\" + str(uuid.uuid4())\n", + "\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(\n", + " name=\"html_annotation_import_demo_dataset\",\n", + " iam_integration=\n", + " None, # Removing this argument will default to the organziation's default iam integration\n", + ")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -172,135 +314,271 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_html`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_html`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT, name=\"text_html\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_html\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_html\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology HTML Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Html,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT, name=\"text_html\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_html\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_html\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + "])\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology HTML Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Html,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create Labelbox project\nproject = client.create_project(name=\"HTML Import Annotation Demo\",\n media_type=lb.MediaType.Html)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create Labelbox project\n", + "project = client.create_project(name=\"HTML Import Annotation Demo\",\n", + " media_type=lb.MediaType.Html)\n", + "\n", + "# Setup your ontology\n", + "project.setup_editor(\n", + " ontology) # Connect your ontology and editor to your project" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-html-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-html-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " text_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=f\"mal_job-{str(uuid.uuid4())}\",\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb index b8c6cd2a6..be2bf6611 100644 --- a/examples/annotation_import/image.ipynb +++ b/examples/annotation_import/image.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Image annotation import\n", @@ -59,294 +57,969 @@ "- Ability to create the payload in the NDJSON import format directly\n", "\n", "- It supports any levels of nested classification (free text / radio / checklist) under the object or classification annotation." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nfrom PIL import Image\nimport requests\nimport base64\nimport labelbox as lb\nimport labelbox.types as lb_types\nfrom io import BytesIO", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "from PIL import Image\n", + "import requests\n", + "import base64\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "from io import BytesIO" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key\n", "\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for image\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification : Radio (single-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_radio_answer\"\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist\n", "\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "nested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# NDJSON\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "# NDJSON\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Relationship with bounding box\n", "> **NOTE:** \n", "> Only supported for MAL imports" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=2096, y=1264),\n end=lb_types.Point(x=2240, y=1689),\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=2272, y=1346),\n end=lb_types.Point(x=2416, y=1704),\n ),\n)\n\nrelationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"uuid\": uuid_source,\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 1264.0,\n \"left\": 2096.0,\n \"height\": 425.0,\n \"width\": 144.0\n },\n}\n\nbbox_target_ndjson = {\n \"uuid\": uuid_target,\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 1346.0,\n \"left\": 2272.0,\n \"height\": 358.0,\n \"width\": 144.0\n },\n}\n\nrelationship_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=2096, y=1264),\n", + " end=lb_types.Point(x=2240, y=1689),\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=2272, y=1346),\n", + " end=lb_types.Point(x=2416, y=1704),\n", + " ),\n", + ")\n", + "\n", + "relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "bbox_source_ndjson = {\n", + " \"uuid\": uuid_source,\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 1264.0,\n", + " \"left\": 2096.0,\n", + " \"height\": 425.0,\n", + " \"width\": 144.0\n", + " },\n", + "}\n", + "\n", + "bbox_target_ndjson = {\n", + " \"uuid\": uuid_target,\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 1346.0,\n", + " \"left\": 2272.0,\n", + " \"height\": 358.0,\n", + " \"width\": 144.0\n", + " },\n", + "}\n", + "\n", + "relationship_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_annotation_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 977,\n", + " \"left\": 1690,\n", + " \"height\": 330,\n", + " \"width\": 225\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n)\n\n## NDJSON\nbbox_with_radio_subclass_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "## NDJSON\n", + "bbox_with_radio_subclass_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 933,\n", + " \"left\": 541,\n", + " \"height\": 191,\n", + " \"width\": 330\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\", # must match your ontology feature\"s name\n value=lb_types.Polygon( # Coordinates for the vertices of your polygon\n points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\n# NDJSON\npolygon_annotation_ndjson = {\n \"name\":\n \"polygon\",\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon\", # must match your ontology feature\"s name\n", + " value=lb_types.Polygon( # Coordinates for the vertices of your polygon\n", + " points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "polygon_annotation_ndjson = {\n", + " \"name\":\n", + " \"polygon\",\n", + " \"polygon\": [\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " {\n", + " \"x\": 2278.306,\n", + " \"y\": 256.885\n", + " },\n", + " {\n", + " \"x\": 2428.197,\n", + " \"y\": 200.437\n", + " },\n", + " {\n", + " \"x\": 2560.0,\n", + " \"y\": 335.419\n", + " },\n", + " {\n", + " \"x\": 2557.386,\n", + " \"y\": 503.165\n", + " },\n", + " {\n", + " \"x\": 2320.596,\n", + " \"y\": 503.103\n", + " },\n", + " {\n", + " \"x\": 2156.083,\n", + " \"y\": 628.943\n", + " },\n", + " {\n", + " \"x\": 2161.111,\n", + " \"y\": 785.519\n", + " },\n", + " {\n", + " \"x\": 2002.115,\n", + " \"y\": 894.647\n", + " },\n", + " {\n", + " \"x\": 1838.456,\n", + " \"y\": 877.874\n", + " },\n", + " {\n", + " \"x\": 1436.53,\n", + " \"y\": 874.636\n", + " },\n", + " {\n", + " \"x\": 1411.403,\n", + " \"y\": 758.579\n", + " },\n", + " {\n", + " \"x\": 1353.853,\n", + " \"y\": 751.74\n", + " },\n", + " {\n", + " \"x\": 1345.264,\n", + " \"y\": 453.461\n", + " },\n", + " {\n", + " \"x\": 1426.011,\n", + " \"y\": 421.129\n", + " },\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Composite mask upload using different mask tools from the project's ontology\n", "This example shows how to assigned different annotations (mask instances) from a composite mask using different mask tools" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# First we need to extract all the unique colors from the composite mask\ndef extract_rgb_colors_from_url(image_url):\n response = requests.get(image_url)\n img = Image.open(BytesIO(response.content))\n\n colors = set()\n for x in range(img.width):\n for y in range(img.height):\n pixel = img.getpixel((x, y))\n if pixel[:3] != (0, 0, 0):\n colors.add(pixel[:3]) # Get only the RGB values\n\n return colors", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# First we need to extract all the unique colors from the composite mask\n", + "def extract_rgb_colors_from_url(image_url):\n", + " response = requests.get(image_url)\n", + " img = Image.open(BytesIO(response.content))\n", + "\n", + " colors = set()\n", + " for x in range(img.width):\n", + " for y in range(img.height):\n", + " pixel = img.getpixel((x, y))\n", + " if pixel[:3] != (0, 0, 0):\n", + " colors.add(pixel[:3]) # Get only the RGB values\n", + "\n", + " return colors" + ] }, { - "metadata": {}, - "source": "cp_mask_url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/composite_mask.png\"\ncolors = extract_rgb_colors_from_url(cp_mask_url)\nresponse = requests.get(cp_mask_url)\n\nmask_data = lb.types.MaskData(\n im_bytes=response.content\n) # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\nrgb_colors_for_mask_with_text_subclass_tool = [\n (73, 39, 85),\n (111, 87, 176),\n (23, 169, 254),\n]\n\ncp_mask = []\nfor color in colors:\n # We are assigning the color related to the mask_with_text_subclass tool by identifying the unique RGB colors\n if color in rgb_colors_for_mask_with_text_subclass_tool:\n cp_mask.append(\n lb_types.ObjectAnnotation(\n name=\n \"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data, color=color),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer sample\"),\n )\n ],\n ))\n else:\n # Create ObjectAnnotation for other masks\n cp_mask.append(\n lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=color)))\n\n# NDJSON using bytes array\ncp_mask_ndjson = []\n\n# Using bytes array.\nresponse = requests.get(cp_mask_url)\nim_bytes = base64.b64encode(response.content).decode(\"utf-8\")\nfor color in colors:\n if color in rgb_colors_for_mask_with_text_subclass_tool:\n cp_mask_ndjson.append({\n \"name\":\n \"mask_with_text_subclass\",\n \"mask\": {\n \"imBytes\": im_bytes,\n \"colorRGB\": color\n },\n \"classifications\": [{\n \"name\": \"sub_free_text\",\n \"answer\": \"free text answer\"\n }],\n })\n else:\n cp_mask_ndjson.append({\n \"name\": \"mask\",\n \"classifications\": [],\n \"mask\": {\n \"imBytes\": im_bytes,\n \"colorRGB\": color\n },\n })", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "cp_mask_url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/composite_mask.png\"\n", + "colors = extract_rgb_colors_from_url(cp_mask_url)\n", + "response = requests.get(cp_mask_url)\n", + "\n", + "mask_data = lb.types.MaskData(\n", + " im_bytes=response.content\n", + ") # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\n", + "rgb_colors_for_mask_with_text_subclass_tool = [\n", + " (73, 39, 85),\n", + " (111, 87, 176),\n", + " (23, 169, 254),\n", + "]\n", + "\n", + "cp_mask = []\n", + "for color in colors:\n", + " # We are assigning the color related to the mask_with_text_subclass tool by identifying the unique RGB colors\n", + " if color in rgb_colors_for_mask_with_text_subclass_tool:\n", + " cp_mask.append(\n", + " lb_types.ObjectAnnotation(\n", + " name=\n", + " \"mask_with_text_subclass\", # must match your ontology feature\"s name\n", + " value=lb_types.Mask(mask=mask_data, color=color),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_free_text\",\n", + " value=lb_types.Text(answer=\"free text answer sample\"),\n", + " )\n", + " ],\n", + " ))\n", + " else:\n", + " # Create ObjectAnnotation for other masks\n", + " cp_mask.append(\n", + " lb_types.ObjectAnnotation(name=\"mask\",\n", + " value=lb_types.Mask(mask=mask_data,\n", + " color=color)))\n", + "\n", + "# NDJSON using bytes array\n", + "cp_mask_ndjson = []\n", + "\n", + "# Using bytes array.\n", + "response = requests.get(cp_mask_url)\n", + "im_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n", + "for color in colors:\n", + " if color in rgb_colors_for_mask_with_text_subclass_tool:\n", + " cp_mask_ndjson.append({\n", + " \"name\":\n", + " \"mask_with_text_subclass\",\n", + " \"mask\": {\n", + " \"imBytes\": im_bytes,\n", + " \"colorRGB\": color\n", + " },\n", + " \"classifications\": [{\n", + " \"name\": \"sub_free_text\",\n", + " \"answer\": \"free text answer\"\n", + " }],\n", + " })\n", + " else:\n", + " cp_mask_ndjson.append({\n", + " \"name\": \"mask\",\n", + " \"classifications\": [],\n", + " \"mask\": {\n", + " \"imBytes\": im_bytes,\n", + " \"colorRGB\": color\n", + " },\n", + " })" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\", # must match your ontology feature\"s name\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\": \"point\",\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point\", # must match your ontology feature\"s name\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "# NDJSON\n", + "point_annotation_ndjson = {\n", + " \"name\": \"point\",\n", + " \"classifications\": [],\n", + " \"point\": {\n", + " \"x\": 1166.606,\n", + " \"y\": 1441.768\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\", # must match your ontology feature\"s name\n value=lb_types.Line( # Coordinates for the keypoints in your polyline\n points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\n# NDJSON\npolyline_annotation_ndjson = {\n \"name\":\n \"polyline\",\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline\", # must match your ontology feature\"s name\n", + " value=lb_types.Line( # Coordinates for the keypoints in your polyline\n", + " points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "polyline_annotation_ndjson = {\n", + " \"name\":\n", + " \"polyline\",\n", + " \"classifications\": [],\n", + " \"line\": [\n", + " {\n", + " \"x\": 2534.353,\n", + " \"y\": 249.471\n", + " },\n", + " {\n", + " \"x\": 2429.492,\n", + " \"y\": 182.092\n", + " },\n", + " {\n", + " \"x\": 2294.322,\n", + " \"y\": 221.962\n", + " },\n", + " {\n", + " \"x\": 2224.491,\n", + " \"y\": 180.463\n", + " },\n", + " {\n", + " \"x\": 2136.123,\n", + " \"y\": 204.716\n", + " },\n", + " {\n", + " \"x\": 1712.247,\n", + " \"y\": 173.949\n", + " },\n", + " {\n", + " \"x\": 1703.838,\n", + " \"y\": 84.438\n", + " },\n", + " {\n", + " \"x\": 1579.772,\n", + " \"y\": 82.61\n", + " },\n", + " {\n", + " \"x\": 1583.442,\n", + " \"y\": 167.552\n", + " },\n", + " {\n", + " \"x\": 1478.869,\n", + " \"y\": 164.903\n", + " },\n", + " {\n", + " \"x\": 1418.941,\n", + " \"y\": 318.149\n", + " },\n", + " {\n", + " \"x\": 1243.128,\n", + " \"y\": 400.815\n", + " },\n", + " {\n", + " \"x\": 1022.067,\n", + " \"y\": 319.007\n", + " },\n", + " {\n", + " \"x\": 892.367,\n", + " \"y\": 379.216\n", + " },\n", + " {\n", + " \"x\": 670.273,\n", + " \"y\": 364.408\n", + " },\n", + " {\n", + " \"x\": 613.114,\n", + " \"y\": 288.16\n", + " },\n", + " {\n", + " \"x\": 377.559,\n", + " \"y\": 238.251\n", + " },\n", + " {\n", + " \"x\": 368.087,\n", + " \"y\": 185.064\n", + " },\n", + " {\n", + " \"x\": 246.557,\n", + " \"y\": 167.286\n", + " },\n", + " {\n", + " \"x\": 236.648,\n", + " \"y\": 285.61\n", + " },\n", + " {\n", + " \"x\": 90.929,\n", + " \"y\": 326.412\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# End-to-end example: Import pre-labels or ground truth" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into catalog\n", "\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\"\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"image-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\n\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", + "\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"image-demo-dataset\")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { - "metadata": {}, - "source": "print(dataset)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "print(dataset)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we created the bounding box annotation above, we provided the `name` as `bounding_box`. Now, when we setup our ontology, we must ensure that the name of the bounding box tool is also `bounding_box`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Annotation Import Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", + " name=\"mask_with_text_subclass\",\n", + " classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"sub_free_text\")\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Image Annotation Import Demo Ontology\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\n\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "project = client.create_project(name=\"Image Annotation Import Demo\",\n", + " media_type=lb.MediaType.Image)\n", + "\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "batch = project.create_batch(\n \"image-demo-batch\", # each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # paginated collection of data row objects, list of data row ids or global keys\n priority=1, # priority between 1(highest) - 5(lowest)\n)\n\nprint(f\"Batch: {batch}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "batch = project.create_batch(\n", + " \"image-demo-batch\", # each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # paginated collection of data row objects, list of data row ids or global keys\n", + " priority=1, # priority between 1(highest) - 5(lowest)\n", + ")\n", + "\n", + "print(f\"Batch: {batch}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", @@ -354,82 +1027,156 @@ "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. If you are using Python Annotation types, compose your annotations into Labels attached to the data rows." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Python annotations\n", "\n", "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n bbox_source,\n bbox_target,\n relationship,\n] + cp_mask\n\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label = []\n", + "annotations = [\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " polygon_annotation,\n", + " point_annotation,\n", + " polyline_annotation,\n", + " bbox_source,\n", + " bbox_target,\n", + " relationship,\n", + "] + cp_mask\n", + "\n", + "label.append(\n", + " lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations\n", "Here we create the complete label ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nannotations = [\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n checklist_annotation_ndjson,\n text_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_ndjson,\n polygon_annotation_ndjson,\n point_annotation_ndjson,\n polyline_annotation_ndjson,\n bbox_source_ndjson,\n bbox_target_ndjson,\n relationship_ndjson, ## Only supported for MAL imports\n] + cp_mask_ndjson\n\nfor annotation in annotations:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "annotations = [\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " bbox_with_radio_subclass_ndjson,\n", + " polygon_annotation_ndjson,\n", + " point_annotation_ndjson,\n", + " polyline_annotation_ndjson,\n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " relationship_ndjson, ## Only supported for MAL imports\n", + "] + cp_mask_ndjson\n", + "\n", + "for annotation in annotations:\n", + " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotation)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload annotations to a project as pre-labels or ground truth\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# upload MAL labels for this data row in project\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_job\" + str(uuid.uuid4()),\n predictions=label,\n)\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")\nprint(f\"Status of uploads: {upload_job.statuses}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# upload MAL labels for this data row in project\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_job\" + str(uuid.uuid4()),\n", + " predictions=label,\n", + ")\n", + "upload_job.wait_until_done()\n", + "\n", + "print(f\"Errors: {upload_job.errors}\")\n", + "print(f\"Status of uploads: {upload_job.statuses}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Relationships are not supported with LabelImport\n# For this demo either run MAL or Ground Truth, not both\n\n# Upload label for this data row in project\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=label)\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Relationships are not supported with LabelImport\n", + "# For this demo either run MAL or Ground Truth, not both\n", + "\n", + "# Upload label for this data row in project\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", + "# labels=label)\n", + "\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" + ] }, { - "metadata": {}, - "source": "# project.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 5c0742ba5..9143b0620 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 1, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -56,137 +54,538 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_annotations = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_annotations_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", + " end=lb_types.Point(x=518.571,\n", + " y=245.143), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_annotation_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 135.3,\n", + " \"left\": 102.771,\n", + " \"height\": 109.843,\n", + " \"width\": 415.8\n", + " },\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", + " end=lb_types.Point(x=566.657,\n", + " y=420.986), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### Relationships ##########\n", + "entity_source = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_target = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "entity_source_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_source,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "\n", + "entity_target_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_target,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with relationships #############\n", + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", + " end=lb_types.Point(x=270.907,\n", + " y=149.556), # x = left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=96.424, y=66.251),\n", + " end=lb_types.Point(x=179.074, y=146.932),\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source_2 = str(uuid.uuid4())\n", + "uuid_target_2 = str(uuid.uuid4())\n", + "\n", + "bbox_source_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_source_2,\n", + " \"bbox\": {\n", + " \"top\": 68.875,\n", + " \"left\": 188.257,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_target_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_target_2,\n", + " \"bbox\": {\n", + " \"top\": 66.251,\n", + " \"left\": 96.424,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source_2,\n", + " \"target\": uuid_target_2,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -200,60 +599,206 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483_doc.pdf\"\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"PDF_annotation_demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -262,124 +807,357 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json_str)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + " \"Organic charge transfer salts based on the donor\",\n", + " \"the experimental investigations on this issue have not\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "text_selections_source = []\n", + "text_selections_target = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_annotations_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[2]:\n", + " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_source = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", + " text_selections_source.append(text_selection_entity_source)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_source_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_source, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[3]:\n", + " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_target = lb_types.DocumentTextSelection(\n", + " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", + " text_selections_target.append(text_selection_entity_target)\n", + " # build text selections forthe NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_target_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_target, # ids representing individual words from the group\n", + " page=1,\n", + " )" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# re-write the entity annotation with text selections\n", + "entities_annotation_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", textSelections=text_selections)\n", + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_annotation_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# re-write the entity source and target annotations withe text selectios\n", + "entity_source_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_source)\n", + "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_source_doc)\n", + "\n", + "entity_target_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_target)\n", + "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_target_doc)\n", + "\n", + "# re-write the entity relationship with the re-created entities\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")" + ] }, { - "metadata": {}, - "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", + "print(f\"entities_annotation={entities_annotation}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", + "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", + "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", + "print(f\"entity_source={entity_source}\")\n", + "print(f\"entity_target={entity_target}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " entity_source,\n", + " entity_target,\n", + " entity_relationship, # Only supported for MAL imports\n", + " bbox_source,\n", + " bbox_target,\n", + " bbox_relationship, # Only supported for MAL imports\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annot in [\n", + " entities_annotations_ndjson,\n", + " checklist_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " bbox_with_radio_subclass_annotation_ndjson,\n", + " ner_with_checklist_subclass_annotation_ndjson,\n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Uncomment this code when excluding relationships from label import\n", + "## Relationships are not currently supported for label import\n", + "\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", + "# labels=labels) ## Remove unsupported relationships from the labels list\n", + "\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb index 1d4bb99d6..d71bad3f7 100644 --- a/examples/annotation_import/text.ipynb +++ b/examples/annotation_import/text.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Text Annotation Import\n", @@ -55,125 +53,299 @@ "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n", "For information on what types of annotations are supported per data type, refer to the Import text annotations [documentation](https://docs.labelbox.com/reference/import-text-annotations)." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly.\n", " * You may need to refresh your browser in order to see the results of the import job." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid\nimport json", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid\n", + "import json" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for text" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Python annotation types and NDJSON" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entities ##########\n\n# Python annotation\nnamed_entity = lb_types.TextEntity(start=10, end=20)\nnamed_entitity_annotation = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\")\n\n# NDJSON\nentities_ndjson = {\n \"name\": \"named_entity\",\n \"location\": {\n \"start\": 67,\n \"end\": 128\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entities ##########\n", + "\n", + "# Python annotation\n", + "named_entity = lb_types.TextEntity(start=10, end=20)\n", + "named_entitity_annotation = lb_types.ObjectAnnotation(value=named_entity,\n", + " name=\"named_entity\")\n", + "\n", + "# NDJSON\n", + "entities_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"location\": {\n", + " \"start\": 67,\n", + " \"end\": 128\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio (single choice ) ##########\n\n# Python annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Radio (single choice ) ##########\n", + "\n", + "# Python annotation\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Radio and Checklist (with subclassifications) ##########\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# NDJSON\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Checklist (Multi-choice) ##########\n\n# Python annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n {\n \"name\": \"third_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Checklist (Multi-choice) ##########\n", + "\n", + "# Python annotation\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"third_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification Free-Form text ##########\n\n# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification Free-Form text ##########\n", + "\n", + "# Python annotation\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "# NDJSON\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annoations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file\nglobal_key = \"lorem-ipsum.txt\"\ntext_asset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n \"global_key\":\n global_key,\n \"media_type\":\n \"TEXT\",\n \"attachments\": [{\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n }],\n}\n\ndataset = client.create_dataset(\n name=\"text_annotation_import_demo_dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([text_asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file\n", + "global_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\n", + "text_asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n", + " \"global_key\":\n", + " global_key,\n", + " \"media_type\":\n", + " \"TEXT\",\n", + " \"attachments\": [{\n", + " \"type\":\n", + " \"TEXT_URL\",\n", + " \"value\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", + " }],\n", + "}\n", + "\n", + "dataset = client.create_dataset(\n", + " name=\"text_annotation_import_demo_dataset\",\n", + " iam_integration=\n", + " None, # Removing this argument will default to the organziation's default iam integration\n", + ")\n", + "task = dataset.create_data_rows([text_asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -182,46 +354,129 @@ "For example, when we create the checklist annotation above, we provided the `name` as `checklist_question`. Now, when we setup our ontology, we must ensure that the name of my classification tool is also `checklist_question`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification( # Text classification given the name \"text\"\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Text Annotations\",\n ontology_builder.asdict())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[lb.Option(value=\"first_radio_answer\")],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification( # Text classification given the name \"text\"\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Text Annotations\",\n", + " ontology_builder.asdict())" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Text Annotation Import Demo\",\n media_type=lb.MediaType.Text)\n\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "\n", + "project = client.create_project(name=\"Text Annotation Import Demo\",\n", + " media_type=lb.MediaType.Text)\n", + "\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-text-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-text-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload\n", @@ -229,88 +484,148 @@ "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. If you are using Python Annotation types, compose your annotations into Labels attached to the data rows." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label\nlabels = []\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n named_entitity_annotation,\n radio_annotation,\n checklist_annotation,\n text_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Create a Label\n", + "labels = []\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " named_entitity_annotation,\n", + " radio_annotation,\n", + " checklist_annotation,\n", + " text_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n entities_ndjson,\n radio_annotation_ndjson,\n checklist_annotation_ndjson,\n text_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " entities_ndjson,\n", + " radio_annotation_ndjson,\n", + " checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or ground truth\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python Annotation types). \n", "\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload MAL label for this data row in project\n", + "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_import_job\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job_mal.wait_until_done()\n", + "print(\"Errors:\", upload_job_mal.errors)\n", + "print(\"Status of uploads: \", upload_job_mal.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job_label_import = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_label_import.wait_until_done()\nprint(\"Errors:\", upload_job_label_import.errors)\nprint(\"Status of uploads: \", upload_job_label_import.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job_label_import = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_label_import.wait_until_done()\n", + "print(\"Errors:\", upload_job_label_import.errors)\n", + "print(\"Status of uploads: \", upload_job_label_import.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/tiled.ipynb b/examples/annotation_import/tiled.ipynb index 19cb28d4e..aa3885bfc 100644 --- a/examples/annotation_import/tiled.ipynb +++ b/examples/annotation_import/tiled.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Tiled Imagery Annotation Import\n", @@ -52,10 +50,10 @@ "\n", "For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Notes:\n", @@ -63,283 +61,882 @@ " * If you are importing more than 1,000 annotations at a time, consider submitting separate jobs, as they can take longer than other annotation types to import.\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly.\n", " * You may need to refresh your browser in order to see the results of the import job." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport numpy as np\nimport cv2\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import numpy as np\n", + "import cv2\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for tiled imagery" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Python annotation types and NDJSON " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "####### Point #######\n\n# Python Annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\": \"point_geo\",\n \"point\": {\n \"x\": -99.20647859573366,\n \"y\": 19.40018029091072\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Point #######\n", + "\n", + "# Python Annotation\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point_geo\",\n", + " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", + ")\n", + "\n", + "# NDJSON\n", + "point_annotation_ndjson = {\n", + " \"name\": \"point_geo\",\n", + " \"point\": {\n", + " \"x\": -99.20647859573366,\n", + " \"y\": 19.40018029091072\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Polyline #######\n# Coordinates\ncoords = [\n [-99.20842051506044, 19.40032196622975],\n [-99.20809864997865, 19.39758963475322],\n [-99.20758366584778, 19.39776167179227],\n [-99.20728325843811, 19.3973265189299],\n]\n\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n value=lb_types.Line(points=line_points),\n)\n\n# NDJSON\npolyline_annotation_ndjson = {\n \"name\": \"polyline_geo\",\n \"line\": line_points_ndjson,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Polyline #######\n", + "# Coordinates\n", + "coords = [\n", + " [-99.20842051506044, 19.40032196622975],\n", + " [-99.20809864997865, 19.39758963475322],\n", + " [-99.20758366584778, 19.39776167179227],\n", + " [-99.20728325843811, 19.3973265189299],\n", + "]\n", + "\n", + "line_points = []\n", + "line_points_ndjson = []\n", + "\n", + "for sub in coords:\n", + " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline_geo\",\n", + " value=lb_types.Line(points=line_points),\n", + ")\n", + "\n", + "# NDJSON\n", + "polyline_annotation_ndjson = {\n", + " \"name\": \"polyline_geo\",\n", + " \"line\": line_points_ndjson,\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Polygon #######\n# Coordinates in the desired EPSG coordinate system\ncoords_polygon = [\n [-99.21042680740356, 19.40036244486966],\n [-99.2104160785675, 19.40017017124035],\n [-99.2103409767151, 19.400008256428897],\n [-99.21014785766603, 19.400008256428897],\n [-99.21019077301027, 19.39983622176518],\n [-99.21022295951845, 19.399674306621385],\n [-99.21029806137086, 19.39951239131646],\n [-99.2102873325348, 19.399340356128437],\n [-99.21025514602663, 19.399117722085677],\n [-99.21024441719057, 19.39892544698541],\n [-99.2102336883545, 19.39874329141769],\n [-99.21021223068239, 19.398561135646027],\n [-99.21018004417421, 19.398399219233365],\n [-99.21011567115785, 19.39822718286836],\n [-99.20992255210878, 19.398136104719125],\n [-99.20974016189577, 19.398085505725305],\n [-99.20957922935487, 19.398004547302467],\n [-99.20939683914186, 19.39792358883935],\n [-99.20918226242067, 19.39786286996558],\n [-99.20899987220764, 19.397822390703805],\n [-99.20891404151918, 19.397994427496787],\n [-99.20890331268312, 19.398176583902874],\n [-99.20889258384706, 19.398368859888045],\n [-99.20889258384706, 19.398540896103246],\n [-99.20890331268312, 19.39872305189756],\n [-99.20889258384706, 19.39890520748796],\n [-99.20889258384706, 19.39907724313608],\n [-99.20889258384706, 19.399259398329956],\n [-99.20890331268312, 19.399431433603585],\n [-99.20890331268312, 19.39961358840092],\n [-99.20890331268312, 19.399785623300048],\n [-99.20897841453552, 19.399937418648214],\n [-99.20919299125673, 19.399937418648214],\n [-99.2093861103058, 19.39991717927664],\n [-99.20956850051881, 19.39996777770086],\n [-99.20961141586305, 19.40013981222548],\n [-99.20963287353517, 19.40032196622975],\n [-99.20978307724, 19.4004130431554],\n [-99.20996546745302, 19.40039280384301],\n [-99.21019077301027, 19.400372564528084],\n [-99.21042680740356, 19.40036244486966],\n]\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n value=lb_types.Polygon(points=polygon_points),\n)\n\n# NDJSON\npolygon_annotation_ndjson = {\n \"name\": \"polygon_geo\",\n \"polygon\": polygon_points_ndjson,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Polygon #######\n", + "# Coordinates in the desired EPSG coordinate system\n", + "coords_polygon = [\n", + " [-99.21042680740356, 19.40036244486966],\n", + " [-99.2104160785675, 19.40017017124035],\n", + " [-99.2103409767151, 19.400008256428897],\n", + " [-99.21014785766603, 19.400008256428897],\n", + " [-99.21019077301027, 19.39983622176518],\n", + " [-99.21022295951845, 19.399674306621385],\n", + " [-99.21029806137086, 19.39951239131646],\n", + " [-99.2102873325348, 19.399340356128437],\n", + " [-99.21025514602663, 19.399117722085677],\n", + " [-99.21024441719057, 19.39892544698541],\n", + " [-99.2102336883545, 19.39874329141769],\n", + " [-99.21021223068239, 19.398561135646027],\n", + " [-99.21018004417421, 19.398399219233365],\n", + " [-99.21011567115785, 19.39822718286836],\n", + " [-99.20992255210878, 19.398136104719125],\n", + " [-99.20974016189577, 19.398085505725305],\n", + " [-99.20957922935487, 19.398004547302467],\n", + " [-99.20939683914186, 19.39792358883935],\n", + " [-99.20918226242067, 19.39786286996558],\n", + " [-99.20899987220764, 19.397822390703805],\n", + " [-99.20891404151918, 19.397994427496787],\n", + " [-99.20890331268312, 19.398176583902874],\n", + " [-99.20889258384706, 19.398368859888045],\n", + " [-99.20889258384706, 19.398540896103246],\n", + " [-99.20890331268312, 19.39872305189756],\n", + " [-99.20889258384706, 19.39890520748796],\n", + " [-99.20889258384706, 19.39907724313608],\n", + " [-99.20889258384706, 19.399259398329956],\n", + " [-99.20890331268312, 19.399431433603585],\n", + " [-99.20890331268312, 19.39961358840092],\n", + " [-99.20890331268312, 19.399785623300048],\n", + " [-99.20897841453552, 19.399937418648214],\n", + " [-99.20919299125673, 19.399937418648214],\n", + " [-99.2093861103058, 19.39991717927664],\n", + " [-99.20956850051881, 19.39996777770086],\n", + " [-99.20961141586305, 19.40013981222548],\n", + " [-99.20963287353517, 19.40032196622975],\n", + " [-99.20978307724, 19.4004130431554],\n", + " [-99.20996546745302, 19.40039280384301],\n", + " [-99.21019077301027, 19.400372564528084],\n", + " [-99.21042680740356, 19.40036244486966],\n", + "]\n", + "\n", + "polygon_points = []\n", + "polygon_points_ndjson = []\n", + "\n", + "for sub in coords_polygon:\n", + " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon_geo\",\n", + " value=lb_types.Polygon(points=polygon_points),\n", + ")\n", + "\n", + "# NDJSON\n", + "polygon_annotation_ndjson = {\n", + " \"name\": \"polygon_geo\",\n", + " \"polygon\": polygon_points_ndjson,\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Bounding Box #######\ncoord_object = {\n \"coordinates\": [[\n [-99.20746564865112, 19.39799442829336],\n [-99.20746564865112, 19.39925939999194],\n [-99.20568466186523, 19.39925939999194],\n [-99.20568466186523, 19.39799442829336],\n [-99.20746564865112, 19.39799442829336],\n ]]\n}\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\": \"bbox_geo\",\n \"bbox\": {\n \"top\":\n coord_object[\"coordinates\"][0][1][1],\n \"left\":\n coord_object[\"coordinates\"][0][1][0],\n \"height\":\n coord_object[\"coordinates\"][0][3][1] -\n coord_object[\"coordinates\"][0][1][1],\n \"width\":\n coord_object[\"coordinates\"][0][3][0] -\n coord_object[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Bounding Box #######\n", + "coord_object = {\n", + " \"coordinates\": [[\n", + " [-99.20746564865112, 19.39799442829336],\n", + " [-99.20746564865112, 19.39925939999194],\n", + " [-99.20568466186523, 19.39925939999194],\n", + " [-99.20568466186523, 19.39799442829336],\n", + " [-99.20746564865112, 19.39799442829336],\n", + " ]]\n", + "}\n", + "\n", + "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", + "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", + "\n", + "# Python Annotation\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_geo\",\n", + " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_annotation_ndjson = {\n", + " \"name\": \"bbox_geo\",\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object[\"coordinates\"][0][3][1] -\n", + " coord_object[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object[\"coordinates\"][0][3][0] -\n", + " coord_object[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - radio (single choice) #######\n\n# Python Annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question_geo\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - radio (single choice) #######\n", + "\n", + "# Python Annotation\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question_geo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question_geo\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - Checklist (multi-choice) #######\n\ncoord_object_checklist = {\n \"coordinates\": [[\n [-99.210266, 19.39540372195134],\n [-99.210266, 19.396901],\n [-99.20621067903966, 19.396901],\n [-99.20621067903966, 19.39540372195134],\n [-99.210266, 19.39540372195134],\n ]]\n}\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n)\n\n# NDJSON\nbbox_with_checklist_subclass_ndjson = {\n \"name\": \"bbox_checklist_geo\",\n \"classifications\": [{\n \"name\": \"checklist_class_name\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\"\n }],\n }],\n \"bbox\": {\n \"top\":\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_checklist[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_checklist[\"coordinates\"][0][3][1] -\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_checklist[\"coordinates\"][0][3][0] -\n coord_object_checklist[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - Checklist (multi-choice) #######\n", + "\n", + "coord_object_checklist = {\n", + " \"coordinates\": [[\n", + " [-99.210266, 19.39540372195134],\n", + " [-99.210266, 19.396901],\n", + " [-99.20621067903966, 19.396901],\n", + " [-99.20621067903966, 19.39540372195134],\n", + " [-99.210266, 19.39540372195134],\n", + " ]]\n", + "}\n", + "\n", + "# Python Annotation\n", + "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_checklist_geo\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", + " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_name\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_with_checklist_subclass_ndjson = {\n", + " \"name\": \"bbox_checklist_geo\",\n", + " \"classifications\": [{\n", + " \"name\": \"checklist_class_name\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\"\n", + " }],\n", + " }],\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object_checklist[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object_checklist[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object_checklist[\"coordinates\"][0][3][1] -\n", + " coord_object_checklist[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object_checklist[\"coordinates\"][0][3][0] -\n", + " coord_object_checklist[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification free form text with bbox #######\n\ncoord_object_text = {\n \"coordinates\": [[\n [-99.21019613742828, 19.397447957052933],\n [-99.21019613742828, 19.39772119262215],\n [-99.20986354351044, 19.39772119262215],\n [-99.20986354351044, 19.397447957052933],\n [-99.21019613742828, 19.397447957052933],\n ]]\n}\n# Python Annotation\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\n# NDJSON\nbbox_with_free_text_subclass_ndjson = {\n \"name\": \"bbox_text_geo\",\n \"classifications\": [{\n \"name\": \"free_text_geo\",\n \"answer\": \"sample text\"\n }],\n \"bbox\": {\n \"top\":\n coord_object_text[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_text[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_text[\"coordinates\"][0][3][1] -\n coord_object_text[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_text[\"coordinates\"][0][3][0] -\n coord_object_text[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification free form text with bbox #######\n", + "\n", + "coord_object_text = {\n", + " \"coordinates\": [[\n", + " [-99.21019613742828, 19.397447957052933],\n", + " [-99.21019613742828, 19.39772119262215],\n", + " [-99.20986354351044, 19.39772119262215],\n", + " [-99.20986354351044, 19.397447957052933],\n", + " [-99.21019613742828, 19.397447957052933],\n", + " ]]\n", + "}\n", + "# Python Annotation\n", + "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_text_geo\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.21019613742828,\n", + " y=19.397447957052933), # Top left\n", + " end=lb_types.Point(x=-99.20986354351044,\n", + " y=19.39772119262215), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", + " ],\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_with_free_text_subclass_ndjson = {\n", + " \"name\": \"bbox_text_geo\",\n", + " \"classifications\": [{\n", + " \"name\": \"free_text_geo\",\n", + " \"answer\": \"sample text\"\n", + " }],\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object_text[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object_text[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object_text[\"coordinates\"][0][3][1] -\n", + " coord_object_text[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object_text[\"coordinates\"][0][3][0] -\n", + " coord_object_text[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - Checklist (multi-choice) #######\n\n# Python Annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question_geo\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n {\n \"name\": \"third_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - Checklist (multi-choice) #######\n", + "\n", + "# Python Annotation\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question_geo\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question_geo\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"third_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "########## Classification - Radio and Checklist (with subclassifications) ##########\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# NDJSON\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\nbottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n\nepsg = lb_types.EPSG.EPSG4326\nbounds = lb_types.TiledBounds(epsg=epsg,\n bounds=[top_left_bound, bottom_right_bound])\nglobal_key = \"mexico_city\"\n\ntile_layer = lb_types.TileLayer(\n url=\n \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n)\n\ntiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[17, 23])\n\nasset = {\n \"row_data\": tiled_image_data.asdict(),\n \"global_key\": global_key,\n \"media_type\": \"TMS_GEO\",\n}\n\ndataset = client.create_dataset(name=\"geo_demo_dataset\")\ntask = dataset.create_data_rows([asset])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\n", + "bottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n", + "\n", + "epsg = lb_types.EPSG.EPSG4326\n", + "bounds = lb_types.TiledBounds(epsg=epsg,\n", + " bounds=[top_left_bound, bottom_right_bound])\n", + "global_key = \"mexico_city\" + str(uuid.uuid4())\n", + "\n", + "tile_layer = lb_types.TileLayer(\n", + " url=\n", + " \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n", + ")\n", + "\n", + "tiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n", + " tile_bounds=bounds,\n", + " zoom_levels=[17, 23])\n", + "\n", + "asset = {\n", + " \"row_data\": tiled_image_data.asdict(),\n", + " \"global_key\": global_key,\n", + " \"media_type\": \"TMS_GEO\",\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"geo_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_checklist_geo\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_name\",\n options=[lb.Option(value=\"first_checklist_answer\")],\n ),\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_text_geo\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text_geo\"),\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question_geo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question_geo\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Geospatial Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Geospatial_Tile,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_checklist_geo\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class_name\",\n", + " options=[lb.Option(value=\"first_checklist_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_text_geo\",\n", + " classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text_geo\"),\n", + " ],\n", + " ),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question_geo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question_geo\",\n", + " options=[lb.Option(value=\"first_radio_answer\")],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Geospatial Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Geospatial_Tile,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project\n", "Connect the ontology to the labeling project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Geospatial Project Demo\",\n media_type=lb.MediaType.Geospatial_Tile)\n\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "\n", + "project = client.create_project(name=\"Geospatial Project Demo\",\n", + " media_type=lb.MediaType.Geospatial_Tile)\n", + "\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-geo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-geo-demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. \n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotations\n", "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created on ***Supported Python annotation types and NDJSON*** section." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 python libraries\n\nhsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\nmask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\nkernel = np.ones((15, 20), np.uint8)\nmask = cv2.erode(mask, kernel)\nmask = cv2.dilate(mask, kernel)\nmask_annotation = lb_types.MaskData.from_2D_arr(mask)\nmask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\nh, w, _ = tiled_image_data.value.shape\npixel_bounds = lb_types.TiledBounds(\n epsg=lb_types.EPSG.SIMPLEPIXEL,\n bounds=[lb_types.Point(x=0, y=0),\n lb_types.Point(x=w, y=h)],\n)\ntransformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n src_epsg=pixel_bounds.epsg,\n pixel_bounds=pixel_bounds,\n geo_bounds=tiled_image_data.tile_bounds,\n zoom=20,\n)\npixel_polygons = mask_data.shapely.simplify(3)\nlist_of_polygons = [\n transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n]\npolygon_annotation_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n name=\"polygon_geo_2\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 python libraries\n", + "\n", + "hsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\n", + "mask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\n", + "kernel = np.ones((15, 20), np.uint8)\n", + "mask = cv2.erode(mask, kernel)\n", + "mask = cv2.dilate(mask, kernel)\n", + "mask_annotation = lb_types.MaskData.from_2D_arr(mask)\n", + "mask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\n", + "h, w, _ = tiled_image_data.value.shape\n", + "pixel_bounds = lb_types.TiledBounds(\n", + " epsg=lb_types.EPSG.SIMPLEPIXEL,\n", + " bounds=[lb_types.Point(x=0, y=0),\n", + " lb_types.Point(x=w, y=h)],\n", + ")\n", + "transformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n", + " src_epsg=pixel_bounds.epsg,\n", + " pixel_bounds=pixel_bounds,\n", + " geo_bounds=tiled_image_data.tile_bounds,\n", + " zoom=20,\n", + ")\n", + "pixel_polygons = mask_data.shapely.simplify(3)\n", + "list_of_polygons = [\n", + " transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n", + "]\n", + "polygon_annotation_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n", + " name=\"polygon_geo_2\")" + ] }, { - "metadata": {}, - "source": "labels = []\nlabels.append(\n lb_types.Label(\n data={\n \"global_key\": global_key,\n \"tile_layer\": tile_layer,\n \"tile_bounds\": bounds,\n \"zoom_levels\": [12, 20],\n },\n annotations=[\n point_annotation,\n polyline_annotation,\n polygon_annotation,\n bbox_annotation,\n radio_annotation,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_annotation,\n polygon_annotation_two,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "labels = []\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\n", + " \"global_key\": global_key,\n", + " \"tile_layer\": tile_layer,\n", + " \"tile_bounds\": bounds,\n", + " \"zoom_levels\": [12, 20],\n", + " },\n", + " annotations=[\n", + " point_annotation,\n", + " polyline_annotation,\n", + " polygon_annotation,\n", + " bbox_annotation,\n", + " radio_annotation,\n", + " bbox_with_checklist_subclass,\n", + " bbox_with_free_text_subclass,\n", + " checklist_annotation,\n", + " polygon_annotation_two,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations\n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created on *** Supported Python annotation types and NDJSON *** section." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\n\nfor annotations in [\n point_annotation_ndjson,\n polyline_annotation_ndjson,\n polygon_annotation_ndjson,\n bbox_annotation_ndjson,\n radio_annotation_ndjson,\n bbox_with_checklist_subclass_ndjson,\n bbox_with_free_text_subclass_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson = []\n", + "\n", + "for annotations in [\n", + " point_annotation_ndjson,\n", + " polyline_annotation_ndjson,\n", + " polygon_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " bbox_with_checklist_subclass_ndjson,\n", + " bbox_with_free_text_subclass_ndjson,\n", + " checklist_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload MAL label for this data row in project\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload MAL label for this data row in project\n", + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_import_job\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_geo_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload label for this data row in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_geo_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/video.ipynb b/examples/annotation_import/video.ipynb index 726e22738..0546e84be 100644 --- a/examples/annotation_import/video.ipynb +++ b/examples/annotation_import/video.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Video Annotation Import\n", @@ -44,221 +42,953 @@ " * Polygons \n", "\n", "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nfrom PIL import Image\nimport requests\nimport base64\nimport labelbox as lb\nimport labelbox.types as lb_types\nfrom io import BytesIO\nimport pprint\n\npp = pprint.PrettyPrinter(indent=4)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "from PIL import Image\n", + "import requests\n", + "import base64\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "from io import BytesIO\n", + "import pprint\n", + "\n", + "pp = pprint.PrettyPrinter(indent=4)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for video\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box: (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation class\n\n# bbox dimensions\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n\n# Python Annotation\nbbox_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 13,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 19,\n \"bbox\": bbox_dm\n },\n ]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation class\n", + "\n", + "# bbox dimensions\n", + "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", + "\n", + "# Python Annotation\n", + "bbox_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"],\n", + " y=bbox_dm[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=19,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + "]\n", + "\n", + "# NDJSON\n", + "bbox_annotation_ndjson = {\n", + " \"name\":\n", + " \"bbox_video\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 13,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " {\n", + " \"frame\": 19,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " ]\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Point (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\npoint_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\":\n \"point_video\",\n \"segments\": [{\n \"keyframes\": [{\n \"frame\": 17,\n \"point\": {\n \"x\": 660.134,\n \"y\": 407.926\n }\n }]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "point_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"point_video\",\n", + " keyframe=True,\n", + " frame=17,\n", + " value=lb_types.Point(x=660.134, y=407.926),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "point_annotation_ndjson = {\n", + " \"name\":\n", + " \"point_video\",\n", + " \"segments\": [{\n", + " \"keyframes\": [{\n", + " \"frame\": 17,\n", + " \"point\": {\n", + " \"x\": 660.134,\n", + " \"y\": 407.926\n", + " }\n", + " }]\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "######## Polyline ########\n\n# Python Annotation\npolyline_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\n# NDJSON\npolyline_frame_annotation_ndjson = {\n \"name\":\n \"line_video_frame\",\n \"segments\": [\n {\n \"keyframes\": [\n {\n \"frame\":\n 5,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 100\n },\n {\n \"x\": 100,\n \"y\": 190\n },\n {\n \"x\": 190,\n \"y\": 220\n },\n ],\n },\n {\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 180\n },\n {\n \"x\": 100,\n \"y\": 200\n },\n {\n \"x\": 200,\n \"y\": 260\n },\n ],\n },\n ]\n },\n {\n \"keyframes\": [\n {\n \"frame\": 24,\n \"line\": [{\n \"x\": 300,\n \"y\": 310\n }, {\n \"x\": 330,\n \"y\": 430\n }],\n },\n {\n \"frame\": 45,\n \"line\": [{\n \"x\": 600,\n \"y\": 810\n }, {\n \"x\": 900,\n \"y\": 930\n }],\n },\n ]\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Polyline ########\n", + "\n", + "# Python Annotation\n", + "polyline_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=5,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=20,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=24,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=45,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + "]\n", + "\n", + "# NDJSON\n", + "polyline_frame_annotation_ndjson = {\n", + " \"name\":\n", + " \"line_video_frame\",\n", + " \"segments\": [\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\":\n", + " 5,\n", + " \"line\": [\n", + " {\n", + " \"x\": 680,\n", + " \"y\": 100\n", + " },\n", + " {\n", + " \"x\": 100,\n", + " \"y\": 190\n", + " },\n", + " {\n", + " \"x\": 190,\n", + " \"y\": 220\n", + " },\n", + " ],\n", + " },\n", + " {\n", + " \"frame\":\n", + " 20,\n", + " \"line\": [\n", + " {\n", + " \"x\": 680,\n", + " \"y\": 180\n", + " },\n", + " {\n", + " \"x\": 100,\n", + " \"y\": 200\n", + " },\n", + " {\n", + " \"x\": 200,\n", + " \"y\": 260\n", + " },\n", + " ],\n", + " },\n", + " ]\n", + " },\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 24,\n", + " \"line\": [{\n", + " \"x\": 300,\n", + " \"y\": 310\n", + " }, {\n", + " \"x\": 330,\n", + " \"y\": 430\n", + " }],\n", + " },\n", + " {\n", + " \"frame\": 45,\n", + " \"line\": [{\n", + " \"x\": 600,\n", + " \"y\": 810\n", + " }, {\n", + " \"x\": 900,\n", + " \"y\": 930\n", + " }],\n", + " },\n", + " ]\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio and checklist (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\nradio_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n]\n\n## NDJSON\nframe_radio_classification_ndjson = {\n \"name\": \"radio_class\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"frames\": [{\n \"start\": 9,\n \"end\": 15\n }],\n },\n}\n\n# Python annotation\nchecklist_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n ),\n]\n\n## NDJSON\nframe_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n {\n \"name\": \"second_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "radio_annotation = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=9,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=15,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " ),\n", + "]\n", + "\n", + "## NDJSON\n", + "frame_radio_classification_ndjson = {\n", + " \"name\": \"radio_class\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"frames\": [{\n", + " \"start\": 9,\n", + " \"end\": 15\n", + " }],\n", + " },\n", + "}\n", + "\n", + "# Python annotation\n", + "checklist_annotation = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=29,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=35,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " ),\n", + "]\n", + "\n", + "## NDJSON\n", + "frame_checklist_classification_ndjson = {\n", + " \"name\":\n", + " \"checklist_class\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"frames\": [{\n", + " \"start\": 29,\n", + " \"end\": 35\n", + " }],\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"frames\": [{\n", + " \"start\": 29,\n", + " \"end\": 35\n", + " }],\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist and radio (global)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "##### Global Classifications #######\n\n# Python Annotation\n## For global classifications use ClassificationAnnotation\nglobal_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n )\n]\n\n# NDJSON\nglobal_radio_classification_ndjson = {\n \"name\": \"radio_class_global\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}\n\n# Python annotation\nglobal_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n )\n]\n\n# NDJSON\nglobal_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class_global\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Global Classifications #######\n", + "\n", + "# Python Annotation\n", + "## For global classifications use ClassificationAnnotation\n", + "global_radio_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"radio_class_global\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "global_radio_classification_ndjson = {\n", + " \"name\": \"radio_class_global\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}\n", + "\n", + "# Python annotation\n", + "global_checklist_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_global\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "global_checklist_classification_ndjson = {\n", + " \"name\":\n", + " \"checklist_class_global\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist (global)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Nested Global Classification ###########\n\n# Python Annotation\nnested_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n )\n]\n\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\n# Python Annotation\nnested_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n )\n]\n\n# NDJSON\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Nested Global Classification ###########\n", + "\n", + "# Python Annotation\n", + "nested_radio_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "# Python Annotation\n", + "nested_checklist_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "######### Free text classification ###########\ntext_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n )\n]\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### Free text classification ###########\n", + "text_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + " )\n", + "]\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with sub-classifications (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n\n# bounding box dimensions\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n\n# Python Annotation\nframe_bbox_with_checklist_subclass_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\")\n ]),\n )\n ],\n ),\n]\n\nframe_bbox_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"bbox_class\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 10,\n \"bbox\": bbox_dm2\n },\n {\n \"frame\":\n 11,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\": \"checklist_class\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\"\n }],\n }],\n },\n {\n \"frame\":\n 13,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\": \"checklist_class\",\n \"answer\": [{\n \"name\": \"second_checklist_answer\"\n }],\n }],\n },\n ]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n", + "\n", + "# bounding box dimensions\n", + "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", + "\n", + "# Python Annotation\n", + "frame_bbox_with_checklist_subclass_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=10,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", + " y=bbox_dm2[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=11,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"second_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + "]\n", + "\n", + "frame_bbox_with_checklist_subclass_annotation_ndjson = {\n", + " \"name\":\n", + " \"bbox_class\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 10,\n", + " \"bbox\": bbox_dm2\n", + " },\n", + " {\n", + " \"frame\":\n", + " 11,\n", + " \"bbox\":\n", + " bbox_dm2,\n", + " \"classifications\": [{\n", + " \"name\": \"checklist_class\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\"\n", + " }],\n", + " }],\n", + " },\n", + " {\n", + " \"frame\":\n", + " 13,\n", + " \"bbox\":\n", + " bbox_dm2,\n", + " \"classifications\": [{\n", + " \"name\": \"checklist_class\",\n", + " \"answer\": [{\n", + " \"name\": \"second_checklist_answer\"\n", + " }],\n", + " }],\n", + " },\n", + " ]\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Masks (frame-based)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "def extract_rgb_colors_from_url(image_url):\n response = requests.get(image_url)\n img = Image.open(BytesIO(response.content))\n\n colors = set()\n for x in range(img.width):\n for y in range(img.height):\n pixel = img.getpixel((x, y))\n if pixel[:3] != (0, 0, 0):\n colors.add(pixel[:3]) # Get only the RGB values\n\n return colors", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "def extract_rgb_colors_from_url(image_url):\n", + " response = requests.get(image_url)\n", + " img = Image.open(BytesIO(response.content))\n", + "\n", + " colors = set()\n", + " for x in range(img.width):\n", + " for y in range(img.height):\n", + " pixel = img.getpixel((x, y))\n", + " if pixel[:3] != (0, 0, 0):\n", + " colors.add(pixel[:3]) # Get only the RGB values\n", + "\n", + " return colors" + ] }, { - "metadata": {}, - "source": "### Raster Segmentation (Byte string array)\n## For this example we are going to to pass all the annotations payload in a single VideoMaskAnnotation\n\n# Single mask\nurl = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\nresponse = requests.get(url)\nimg_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n\n# We are generating our frames and instances in this step, and will later add them to the VideoMaskAnnotation that will contain\n# all frames and instances\nframes_mask_single = [\n lb_types.MaskFrame(\n index=20,\n im_bytes=response.\n content, # Instead of bytes you could also pass an instance URI : instance_uri=url\n )\n]\ninstances_mask_single = [\n lb_types.MaskInstance(color_rgb=(76, 104, 177), name=\"video_mask\")\n]\n\n## Add multiple masks using multiple tools in different frames - Note that only once composite mask can exist per frame\nframes_cp_mask_url = [\n {\n \"1\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_1_composite_mask.png\"\n },\n {\n \"24\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n },\n {\n \"26\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_26_composite_mask.png\"\n },\n]\n\nrgb_mask_tool = [(227, 135, 126), (169, 248, 152), (83, 152, 103)]\ncp_masks = []\nunique_colors = set()\n\nlb_frames = []\nlb_instances = []\ncounter = 0\n\nfor d in frames_cp_mask_url:\n for frame_no, v in d.items():\n response = requests.get(v)\n colors = extract_rgb_colors_from_url(v)\n for color in colors:\n if not color in unique_colors:\n unique_colors.add(color)\n name = (\"video_mask\" if color in rgb_mask_tool else\n \"mask_with_text_subclass\")\n lb_instances.append(\n lb_types.MaskInstance(color_rgb=color, name=name))\n counter += 1\n lb_frames.append(\n lb_types.MaskFrame(index=frame_no, im_bytes=response.content))\ncp_masks.append(\n lb_types.VideoMaskAnnotation(\n frames=lb_frames + frames_mask_single,\n instances=lb_instances + instances_mask_single,\n ))\n\npp.pprint(lb_frames)\npp.pprint(cp_masks)\n\n# NDJSON - single tool\nvideo_mask_ndjson_bytes_2 = {\n \"masks\": {\n \"frames\": [\n {\n \"index\": 31,\n \"imBytes\": img_bytes,\n },\n {\n \"index\": 34,\n \"imBytes\": img_bytes,\n },\n ],\n \"instances\": [{\n \"colorRGB\": [76, 104, 177],\n \"name\": \"video_mask\"\n }],\n }\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "### Raster Segmentation (Byte string array)\n", + "## For this example we are going to to pass all the annotations payload in a single VideoMaskAnnotation\n", + "\n", + "# Single mask\n", + "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n", + "response = requests.get(url)\n", + "img_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n", + "\n", + "# We are generating our frames and instances in this step, and will later add them to the VideoMaskAnnotation that will contain\n", + "# all frames and instances\n", + "frames_mask_single = [\n", + " lb_types.MaskFrame(\n", + " index=20,\n", + " im_bytes=response.\n", + " content, # Instead of bytes you could also pass an instance URI : instance_uri=url\n", + " )\n", + "]\n", + "instances_mask_single = [\n", + " lb_types.MaskInstance(color_rgb=(76, 104, 177), name=\"video_mask\")\n", + "]\n", + "\n", + "## Add multiple masks using multiple tools in different frames - Note that only once composite mask can exist per frame\n", + "frames_cp_mask_url = [\n", + " {\n", + " \"1\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_1_composite_mask.png\"\n", + " },\n", + " {\n", + " \"24\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n", + " },\n", + " {\n", + " \"26\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_26_composite_mask.png\"\n", + " },\n", + "]\n", + "\n", + "rgb_mask_tool = [(227, 135, 126), (169, 248, 152), (83, 152, 103)]\n", + "cp_masks = []\n", + "unique_colors = set()\n", + "\n", + "lb_frames = []\n", + "lb_instances = []\n", + "counter = 0\n", + "\n", + "for d in frames_cp_mask_url:\n", + " for frame_no, v in d.items():\n", + " response = requests.get(v)\n", + " colors = extract_rgb_colors_from_url(v)\n", + " for color in colors:\n", + " if not color in unique_colors:\n", + " unique_colors.add(color)\n", + " name = (\"video_mask\" if color in rgb_mask_tool else\n", + " \"mask_with_text_subclass\")\n", + " lb_instances.append(\n", + " lb_types.MaskInstance(color_rgb=color, name=name))\n", + " counter += 1\n", + " lb_frames.append(\n", + " lb_types.MaskFrame(index=frame_no, im_bytes=response.content))\n", + "cp_masks.append(\n", + " lb_types.VideoMaskAnnotation(\n", + " frames=lb_frames + frames_mask_single,\n", + " instances=lb_instances + instances_mask_single,\n", + " ))\n", + "\n", + "pp.pprint(lb_frames)\n", + "pp.pprint(cp_masks)\n", + "\n", + "# NDJSON - single tool\n", + "video_mask_ndjson_bytes_2 = {\n", + " \"masks\": {\n", + " \"frames\": [\n", + " {\n", + " \"index\": 31,\n", + " \"imBytes\": img_bytes,\n", + " },\n", + " {\n", + " \"index\": 34,\n", + " \"imBytes\": img_bytes,\n", + " },\n", + " ],\n", + " \"instances\": [{\n", + " \"colorRGB\": [76, 104, 177],\n", + " \"name\": \"video_mask\"\n", + " }],\n", + " }\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Multiple instances of bounding box annotations in the same frame" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Fist instance of bounding box ranging from frame 22 to 27\nbbox_annotation_1 = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=22,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=27,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n# NDJSON example:\nbbox_frame_annotation_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 22,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 27,\n \"bbox\": bbox_dm2\n },\n ]\n }],\n}\n\n# Second instance of bounding box ranging from frame 22 to 27\nbbox_annotation_2 = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=22,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=27,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n ),\n]\n# NDJSON\nbbox_frame_annotation_ndjson2 = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 22,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 27,\n \"bbox\": bbox_dm2\n },\n ]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Fist instance of bounding box ranging from frame 22 to 27\n", + "bbox_annotation_1 = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=22,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"],\n", + " y=bbox_dm[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=27,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + "]\n", + "# NDJSON example:\n", + "bbox_frame_annotation_ndjson = {\n", + " \"name\":\n", + " \"bbox_video\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 22,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " {\n", + " \"frame\": 27,\n", + " \"bbox\": bbox_dm2\n", + " },\n", + " ]\n", + " }],\n", + "}\n", + "\n", + "# Second instance of bounding box ranging from frame 22 to 27\n", + "bbox_annotation_2 = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=22,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=27,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + "]\n", + "# NDJSON\n", + "bbox_frame_annotation_ndjson2 = {\n", + " \"name\":\n", + " \"bbox_video\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 22,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " {\n", + " \"frame\": 27,\n", + " \"bbox\": bbox_dm2\n", + " },\n", + " ]\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## End-to-end example: Import pre-labels or ground truth" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"sample-video-jellyfish.mp4\"\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n \"global_key\":\n global_key,\n \"media_type\":\n \"VIDEO\",\n}\n\ndataset = client.create_dataset(\n name=\"video_demo_dataset\",\n iam_integration=\n None, # If this argument is removed, labelbox will use the default integration for your organization.\n)\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"sample-video-jellyfish.mp4\" + str(uuid.uuid4())\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", + " \"global_key\":\n", + " global_key,\n", + " \"media_type\":\n", + " \"VIDEO\",\n", + "}\n", + "\n", + "dataset = client.create_dataset(\n", + " name=\"video_demo_dataset\",\n", + " iam_integration=\n", + " None, # If this argument is removed, labelbox will use the default integration for your organization.\n", + ")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -268,140 +998,347 @@ "\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_class\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## Need to defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class_global\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_global\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Video Annotation Import Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Video,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_class\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"checklist_class\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", + " name=\"mask_with_text_subclass\",\n", + " classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"sub_free_text\")\n", + " ],\n", + " ),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class\",\n", + " scope=lb.Classification.Scope.\n", + " INDEX, ## Need to defined scope for frame classifications\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_class\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_class_global\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class_global\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Video Annotation Import Demo Ontology\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Video,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(name=\"Video Annotation Import Demo\",\n media_type=lb.MediaType.Video)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(name=\"Video Annotation Import Demo\",\n", + " media_type=lb.MediaType.Video)\n", + "\n", + "## connect ontology to your project\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "batch = project.create_batch(\n \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # A paginated collection of data row objects, a list of data rows or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "batch = project.create_batch(\n", + " \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # A paginated collection of data row objects, a list of data rows or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "print(\"Batch: \", batch)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above.\n", "\n", "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nannotations_list = [\n checklist_annotation,\n radio_annotation,\n bbox_annotation,\n frame_bbox_with_checklist_subclass_annotation,\n bbox_annotation_1,\n bbox_annotation_2,\n point_annotation,\n polyline_annotation,\n global_checklist_annotation,\n global_radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n text_annotation,\n cp_masks,\n]\n\nfor annotation in annotations_list:\n label.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotation))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label = []\n", + "annotations_list = [\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " bbox_annotation,\n", + " frame_bbox_with_checklist_subclass_annotation,\n", + " bbox_annotation_1,\n", + " bbox_annotation_2,\n", + " point_annotation,\n", + " polyline_annotation,\n", + " global_checklist_annotation,\n", + " global_radio_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " text_annotation,\n", + " cp_masks,\n", + "]\n", + "\n", + "for annotation in annotations_list:\n", + " label.append(\n", + " lb_types.Label(data={\"global_key\": global_key}, annotations=annotation))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations\n", "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created above." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "First, let\"s update the bbox with nested classifications with the corresponding featureSchemaId" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\n\nannotations_list_ndjson = [\n point_annotation_ndjson,\n bbox_annotation_ndjson,\n polyline_frame_annotation_ndjson,\n frame_checklist_classification_ndjson,\n frame_radio_classification_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n frame_bbox_with_checklist_subclass_annotation_ndjson,\n global_radio_classification_ndjson,\n global_checklist_classification_ndjson,\n text_annotation_ndjson,\n bbox_frame_annotation_ndjson,\n bbox_frame_annotation_ndjson2,\n video_mask_ndjson_bytes_2,\n]\n\nfor annotation in annotations_list_ndjson:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "\n", + "annotations_list_ndjson = [\n", + " point_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " polyline_frame_annotation_ndjson,\n", + " frame_checklist_classification_ndjson,\n", + " frame_radio_classification_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " frame_bbox_with_checklist_subclass_annotation_ndjson,\n", + " global_radio_classification_ndjson,\n", + " global_checklist_classification_ndjson,\n", + " text_annotation_ndjson,\n", + " bbox_frame_annotation_ndjson,\n", + " bbox_frame_annotation_ndjson2,\n", + " video_mask_ndjson_bytes_2,\n", + "]\n", + "\n", + "for annotation in annotations_list_ndjson:\n", + " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotation)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job-\" + str(uuid.uuid4()),\n predictions=label,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)\nprint(\" \")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload MAL label for this data row in project\n", + "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"mal_import_job-\" + str(uuid.uuid4()),\n", + " predictions=label,\n", + ")\n", + "\n", + "upload_job_mal.wait_until_done()\n", + "print(\"Errors:\", upload_job_mal.errors)\n", + "print(\"Status of uploads: \", upload_job_mal.statuses)\n", + "print(\" \")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# For this demo either run MAL or Ground truth import, not both.\n\n# upload_job_label_import = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name = \"label_import_job-\" + str(uuid.uuid4()),\n# labels=label\n# )\n\n# upload_job_label_import.wait_until_done()\n# print(\"Errors:\", upload_job_label_import.errors)\n# print(\"Status of uploads: \", upload_job_label_import.statuses)\n# print(\" \")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# For this demo either run MAL or Ground truth import, not both.\n", + "\n", + "# upload_job_label_import = lb.LabelImport.create_from_objects(\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name = \"label_import_job-\" + str(uuid.uuid4()),\n", + "# labels=label\n", + "# )\n", + "\n", + "# upload_job_label_import.wait_until_done()\n", + "# print(\"Errors:\", upload_job_label_import.errors)\n", + "# print(\"Status of uploads: \", upload_job_label_import.statuses)\n", + "# print(\" \")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Delete Project\n# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Delete Project\n", + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/basics/data_row_metadata.ipynb b/examples/basics/data_row_metadata.ipynb index fa5970ce2..6e0df0157 100644 --- a/examples/basics/data_row_metadata.ipynb +++ b/examples/basics/data_row_metadata.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,19 +22,19 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Data Row Metadata\n", "\n", "Metadata is useful to better understand data on the platform to help with labeling review, model diagnostics, and data selection. This **should not be confused with attachments**. Attachments provide additional context for labelers but is not searchable within Catalog." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Metadata ontology\n", @@ -59,87 +57,115 @@ "\n", "* **Embedding**: 128 float 32 vector used for similarity. To upload custom embeddings use the following [tutorial](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/custom_embeddings.ipynb)\n", "* Any metadata kind can be customized" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nfrom datetime import datetime\nfrom pprint import pprint\nfrom labelbox.schema.data_row_metadata import DataRowMetadataKind\nfrom uuid import uuid4", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "from datetime import datetime\n", + "from pprint import pprint\n", + "from labelbox.schema.data_row_metadata import DataRowMetadataKind\n", + "from uuid import uuid4" + ] }, { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your API key\n", + "API_KEY = \"\"\n", + "# To get your API key go to: Workspace settings -> API -> Create API Key\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get the current metadata ontology " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "mdo = client.get_data_row_metadata_ontology()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "mdo = client.get_data_row_metadata_ontology()" + ] }, { - "metadata": {}, - "source": "# list all your metadata ontology as a dictionary accessable by id\nmetadata_ontologies = mdo.fields_by_id\npprint(metadata_ontologies, indent=2)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# list all your metadata ontology as a dictionary accessable by id\n", + "metadata_ontologies = mdo.fields_by_id\n", + "pprint(metadata_ontologies, indent=2)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Access metadata by name" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "split_field = mdo.reserved_by_name[\"split\"]\nsplit_field", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "split_field = mdo.reserved_by_name[\"split\"]\n", + "split_field" + ] }, { - "metadata": {}, - "source": "tag_field = mdo.reserved_by_name[\"tag\"]\ntag_field", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "tag_field = mdo.reserved_by_name[\"tag\"]\n", + "tag_field" + ] }, { - "metadata": {}, - "source": "train_field = mdo.reserved_by_name[\"split\"][\"train\"]\ntrain_field", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "train_field = mdo.reserved_by_name[\"split\"][\"train\"]\n", + "train_field" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Construct metadata fields for existing metadata schemas\n", @@ -148,153 +174,283 @@ "\n", "\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option 1: Specify metadata with a list of `DataRowMetadataField` objects. This is the recommended option since it comes with validation for metadata fields." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Construct a metadata field of string kind\ntag_metadata_field = lb.DataRowMetadataField(\n name=\"tag\",\n value=\"tag_string\",\n)\n\n# Construct an metadata field of datetime kind\ncapture_datetime_field = lb.DataRowMetadataField(\n name=\"captureDateTime\",\n value=datetime.utcnow(),\n)\n\n# Construct a metadata field of Enums options\nsplit_metadata_field = lb.DataRowMetadataField(\n name=\"split\",\n value=\"train\",\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Construct a metadata field of string kind\n", + "tag_metadata_field = lb.DataRowMetadataField(\n", + " name=\"tag\",\n", + " value=\"tag_string\",\n", + ")\n", + "\n", + "# Construct an metadata field of datetime kind\n", + "capture_datetime_field = lb.DataRowMetadataField(\n", + " name=\"captureDateTime\",\n", + " value=datetime.utcnow(),\n", + ")\n", + "\n", + "# Construct a metadata field of Enums options\n", + "split_metadata_field = lb.DataRowMetadataField(\n", + " name=\"split\",\n", + " value=\"train\",\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option 2: You can also specify the metadata fields with dictionary format without declaring the `DataRowMetadataField` objects.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Construct a dictionary of string metadata\ntag_metadata_field_dict = {\n \"name\": \"tag\",\n \"value\": \"tag_string\",\n}\n\n# Construct a dictionary of datetime metadata\ncapture_datetime_field_dict = {\n \"name\": \"captureDateTime\",\n \"value\": datetime.utcnow(),\n}\n\n# Construct a dictionary of Enums options metadata\nsplit_metadata_field_dict = {\n \"name\": \"split\",\n \"value\": \"train\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Construct a dictionary of string metadata\n", + "tag_metadata_field_dict = {\n", + " \"name\": \"tag\",\n", + " \"value\": \"tag_string\",\n", + "}\n", + "\n", + "# Construct a dictionary of datetime metadata\n", + "capture_datetime_field_dict = {\n", + " \"name\": \"captureDateTime\",\n", + " \"value\": datetime.utcnow(),\n", + "}\n", + "\n", + "# Construct a dictionary of Enums options metadata\n", + "split_metadata_field_dict = {\n", + " \"name\": \"split\",\n", + " \"value\": \"train\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create a custom metadata schema with their corresponding fields\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Final\ncustom_metadata_fields = []\n\n# Create the schema for the metadata\nnumber_schema = mdo.create_schema(name=\"numberMetadataCustom\",\n kind=DataRowMetadataKind.number)\n\n# Add fields to the metadata schema\ndata_row_metadata_fields_number = lb.DataRowMetadataField(\n name=number_schema.name, value=5.0)\n\ncustom_metadata_fields.append(data_row_metadata_fields_number)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Final\n", + "custom_metadata_fields = []\n", + "\n", + "# Create the schema for the metadata\n", + "number_schema = mdo.create_schema(name=\"numberMetadataCustom\",\n", + " kind=DataRowMetadataKind.number)\n", + "\n", + "# Add fields to the metadata schema\n", + "data_row_metadata_fields_number = lb.DataRowMetadataField(\n", + " name=number_schema.name, value=5.0)\n", + "\n", + "custom_metadata_fields.append(data_row_metadata_fields_number)" + ] }, { - "metadata": {}, - "source": "# Create the schema for an enum metadata\ncustom_metadata_fields = []\n\nenum_schema = mdo.create_schema(\n name=\"enumMetadata\",\n kind=DataRowMetadataKind.enum,\n options=[\"option1\", \"option2\"],\n)\n\n# Add fields to the metadata schema\ndata_row_metadata_fields_enum_1 = lb.DataRowMetadataField(name=enum_schema.name,\n value=\"option1\")\ncustom_metadata_fields.append(data_row_metadata_fields_enum_1)\n\ndata_row_metadata_fields_enum_2 = lb.DataRowMetadataField(name=enum_schema.name,\n value=\"option2\")\ncustom_metadata_fields.append(data_row_metadata_fields_enum_2)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create the schema for an enum metadata\n", + "custom_metadata_fields = []\n", + "\n", + "enum_schema = mdo.create_schema(\n", + " name=\"enumMetadata\",\n", + " kind=DataRowMetadataKind.enum,\n", + " options=[\"option1\", \"option2\"],\n", + ")\n", + "\n", + "# Add fields to the metadata schema\n", + "data_row_metadata_fields_enum_1 = lb.DataRowMetadataField(name=enum_schema.name,\n", + " value=\"option1\")\n", + "custom_metadata_fields.append(data_row_metadata_fields_enum_1)\n", + "\n", + "data_row_metadata_fields_enum_2 = lb.DataRowMetadataField(name=enum_schema.name,\n", + " value=\"option2\")\n", + "custom_metadata_fields.append(data_row_metadata_fields_enum_2)" + ] }, { - "metadata": {}, - "source": "# Inspect the newly created metadata schemas\nmetadata_ontologies = mdo.fields_by_id\npprint(metadata_ontologies, indent=2)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Inspect the newly created metadata schemas\n", + "metadata_ontologies = mdo.fields_by_id\n", + "pprint(metadata_ontologies, indent=2)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create data rows with metadata\n", "\n", "See our [documentation](https://docs.labelbox.com/docs/limits) for information on limits for uploading data rows in a single API operation." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# A simple example of uploading data rows with metadata\ndataset = client.create_dataset(\n name=\"Simple Data Rows import with metadata example\")\nglobal_key = \"s_basic.jpg\"\ndata_row = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\",\n \"global_key\":\n global_key,\n}\n# This line works with dictionaries as well as schemas and fields created with DataRowMetadataField\ndata_row[\"metadata_fields\"] = custom_metadata_fields + [\n split_metadata_field,\n capture_datetime_field_dict,\n tag_metadata_field,\n]\n\ntask = dataset.create_data_rows([data_row])\ntask.wait_till_done()\nresult_task = task.result\nprint(result_task)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# A simple example of uploading data rows with metadata\n", + "dataset = client.create_dataset(\n", + " name=\"Simple Data Rows import with metadata example\")\n", + "global_key = \"s_basic.jpg\" + str(uuid4())\n", + "data_row = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "# This line works with dictionaries as well as schemas and fields created with DataRowMetadataField\n", + "data_row[\"metadata_fields\"] = custom_metadata_fields + [\n", + " split_metadata_field,\n", + " capture_datetime_field_dict,\n", + " tag_metadata_field,\n", + "]\n", + "\n", + "task = dataset.create_data_rows([data_row])\n", + "task.wait_till_done()\n", + "result_task = task.result\n", + "print(result_task)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Update data row metadata" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Get the data row that was uploaded in the previous cell\nnum_schema = mdo.get_by_name(\"numberMetadataCustom\")\n\n# Update the metadata\nupdated_metadata = lb.DataRowMetadataField(schema_id=num_schema.uid, value=10.2)\n\n# Create data row payload\ndata_row_payload = lb.DataRowMetadata(global_key=global_key,\n fields=[updated_metadata])\n\n# Upsert the fields with the update metadata for number-metadata\nmdo.bulk_upsert([data_row_payload])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get the data row that was uploaded in the previous cell\n", + "num_schema = mdo.get_by_name(\"numberMetadataCustom\")\n", + "\n", + "# Update the metadata\n", + "updated_metadata = lb.DataRowMetadataField(schema_id=num_schema.uid, value=10.2)\n", + "\n", + "# Create data row payload\n", + "data_row_payload = lb.DataRowMetadata(global_key=global_key,\n", + " fields=[updated_metadata])\n", + "\n", + "# Upsert the fields with the update metadata for number-metadata\n", + "mdo.bulk_upsert([data_row_payload])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Update metadata schema" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# update a name\nnumber_schema = mdo.update_schema(name=\"numberMetadataCustom\",\n new_name=\"numberMetadataCustomNew\")\n\n# update an Enum metadata schema option's name, this only applies to Enum metadata schema.\nenum_schema = mdo.update_enum_option(name=\"enumMetadata\",\n option=\"option1\",\n new_option=\"option3\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# update a name\n", + "number_schema = mdo.update_schema(name=\"numberMetadataCustom\",\n", + " new_name=\"numberMetadataCustomNew\")\n", + "\n", + "# update an Enum metadata schema option's name, this only applies to Enum metadata schema.\n", + "enum_schema = mdo.update_enum_option(name=\"enumMetadata\",\n", + " option=\"option1\",\n", + " new_option=\"option3\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Accessing metadata\n", "\n", "You can examine an individual data row, including its metadata." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "data_row = next(dataset.data_rows())\nfor metadata_field in data_row.metadata_fields:\n print(metadata_field[\"name\"], \":\", metadata_field[\"value\"])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_row = next(dataset.data_rows())\n", + "for metadata_field in data_row.metadata_fields:\n", + " print(metadata_field[\"name\"], \":\", metadata_field[\"value\"])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "You can bulk export metadata using data row IDs." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "data_rows_metadata = mdo.bulk_export([data_row.uid])\nlen(data_rows_metadata)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_rows_metadata = mdo.bulk_export([data_row.uid])\n", + "len(data_rows_metadata)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Delete custom metadata schema \n", "You can delete custom metadata schema by name. If you wish to delete a metadata schema, uncomment the line below and insert the desired name." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# status = mdo.delete_schema(name=\"\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# status = mdo.delete_schema(name=\"\")" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/model_experiments/custom_metrics_demo.ipynb b/examples/model_experiments/custom_metrics_demo.ipynb index 8e0412f40..7fff1a770 100644 --- a/examples/model_experiments/custom_metrics_demo.ipynb +++ b/examples/model_experiments/custom_metrics_demo.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "----\n", @@ -38,236 +36,1528 @@ " * Iterate faster\n", " * Measure and report on model quality\n", " * Understand marginal value of additional labels and modeling efforts\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Environment setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n )),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: checklist (multi-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n ]),\n)\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n {\n \"name\":\n \"second_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + " lb_types.ClassificationAnswer(\n", + " name=\"second_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + " ]),\n", + ")\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " {\n", + " \"name\":\n", + " \"second_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"confidence\":\n 0.5,\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"confidence\":\n 0.5,\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\":\n \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_radio_question\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "bbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"bbox\": {\n", + " \"top\": 977,\n", + " \"left\": 1690,\n", + " \"height\": 330,\n", + " \"width\": 225\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n)\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "## NDJSON\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 933,\n", + " \"left\": 541,\n", + " \"height\": 191,\n", + " \"width\": 330\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Anotation\n", + "polygon_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "polygon_prediction_ndjson = {\n", + " \"name\":\n", + " \"polygon\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"polygon\": [\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " {\n", + " \"x\": 2278.306,\n", + " \"y\": 256.885\n", + " },\n", + " {\n", + " \"x\": 2428.197,\n", + " \"y\": 200.437\n", + " },\n", + " {\n", + " \"x\": 2560.0,\n", + " \"y\": 335.419\n", + " },\n", + " {\n", + " \"x\": 2557.386,\n", + " \"y\": 503.165\n", + " },\n", + " {\n", + " \"x\": 2320.596,\n", + " \"y\": 503.103\n", + " },\n", + " {\n", + " \"x\": 2156.083,\n", + " \"y\": 628.943\n", + " },\n", + " {\n", + " \"x\": 2161.111,\n", + " \"y\": 785.519\n", + " },\n", + " {\n", + " \"x\": 2002.115,\n", + " \"y\": 894.647\n", + " },\n", + " {\n", + " \"x\": 1838.456,\n", + " \"y\": 877.874\n", + " },\n", + " {\n", + " \"x\": 1436.53,\n", + " \"y\": 874.636\n", + " },\n", + " {\n", + " \"x\": 1411.403,\n", + " \"y\": 758.579\n", + " },\n", + " {\n", + " \"x\": 1353.853,\n", + " \"y\": 751.74\n", + " },\n", + " {\n", + " \"x\": 1345.264,\n", + " \"y\": 453.461\n", + " },\n", + " {\n", + " \"x\": 1426.011,\n", + " \"y\": 421.129\n", + " },\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\",\n", + " value=lb_types.Text(\n", + " answer=\"sample text\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "point_prediction = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "point_prediction_ndjson = {\n", + " \"name\": \"point\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [],\n", + " \"point\": {\n", + " \"x\": 1166.606,\n", + " \"y\": 1441.768\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "polyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "polyline_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")\n", + "\n", + "polyline_prediction_ndjson = {\n", + " \"name\":\n", + " \"polyline\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [],\n", + " \"line\": [\n", + " {\n", + " \"x\": 2534.353,\n", + " \"y\": 249.471\n", + " },\n", + " {\n", + " \"x\": 2429.492,\n", + " \"y\": 182.092\n", + " },\n", + " {\n", + " \"x\": 2294.322,\n", + " \"y\": 221.962\n", + " },\n", + " {\n", + " \"x\": 2224.491,\n", + " \"y\": 180.463\n", + " },\n", + " {\n", + " \"x\": 2136.123,\n", + " \"y\": 204.716\n", + " },\n", + " {\n", + " \"x\": 1712.247,\n", + " \"y\": 173.949\n", + " },\n", + " {\n", + " \"x\": 1703.838,\n", + " \"y\": 84.438\n", + " },\n", + " {\n", + " \"x\": 1579.772,\n", + " \"y\": 82.61\n", + " },\n", + " {\n", + " \"x\": 1583.442,\n", + " \"y\": 167.552\n", + " },\n", + " {\n", + " \"x\": 1478.869,\n", + " \"y\": 164.903\n", + " },\n", + " {\n", + " \"x\": 1418.941,\n", + " \"y\": 318.149\n", + " },\n", + " {\n", + " \"x\": 1243.128,\n", + " \"y\": 400.815\n", + " },\n", + " {\n", + " \"x\": 1022.067,\n", + " \"y\": 319.007\n", + " },\n", + " {\n", + " \"x\": 892.367,\n", + " \"y\": 379.216\n", + " },\n", + " {\n", + " \"x\": 670.273,\n", + " \"y\": 364.408\n", + " },\n", + " {\n", + " \"x\": 613.114,\n", + " \"y\": 288.16\n", + " },\n", + " {\n", + " \"x\": 377.559,\n", + " \"y\": 238.251\n", + " },\n", + " {\n", + " \"x\": 368.087,\n", + " \"y\": 185.064\n", + " },\n", + " {\n", + " \"x\": 246.557,\n", + " \"y\": 167.286\n", + " },\n", + " {\n", + " \"x\": 236.648,\n", + " \"y\": 285.61\n", + " },\n", + " {\n", + " \"x\": 90.929,\n", + " \"y\": 326.412\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\"\ntest_img_urls = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"Custom metrics demo\",\n iam_integration=None)\ntask = dataset.create_data_rows([test_img_urls])\n\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", + "test_img_urls = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"Custom metrics demo\",\n", + " iam_integration=None)\n", + "task = dataset.create_data_rows([test_img_urls])\n", + "\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of tools\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Image Prediction Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(\n name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(\n", + " name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid,\n", + ")\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -275,150 +1565,376 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section.\n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both (with exception of the uuid strings that are generated)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label for predictions\n", + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data=lb_types.ImageData(global_key=global_key),\n", + " annotations=[\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " polyline_prediction,\n", + " polygon_prediction,\n", + " point_prediction,\n", + " text_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_prediction_ndjson = []\n", + "\n", + "for annot in [\n", + " radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " polygon_prediction_ndjson,\n", + " point_prediction_ndjson,\n", + " polyline_prediction_ndjson,\n", + " text_annotation_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + "]:\n", + " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_prediction_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for prediction uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations.\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"image_prediction_many_kinds\",\n", + " media_type=lb.MediaType.Image)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Annotations ###########\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n annotations=annotations))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "label = []\n", + "annotations = [\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " polygon_annotation,\n", + " point_annotation,\n", + " polyline_annotation,\n", + "]\n", + "label.append(\n", + " lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n", + " annotations=annotations))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"annotation_import_\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/model_experiments/model_predictions_to_project.ipynb b/examples/model_experiments/model_predictions_to_project.ipynb index a1560feec..fd52ff5f8 100644 --- a/examples/model_experiments/model_predictions_to_project.ipynb +++ b/examples/model_experiments/model_predictions_to_project.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,47 +22,60 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Import Model Run Predictions to a Project\n", "Throughout the process of training your machine learning (ML) model, you may want to export your model-run predictions and import them to your new project. In this notebook, we will demonstrate the process on how to get those predictions moved over." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nfrom labelbox.schema.conflict_resolution_strategy import (\n ConflictResolutionStrategy,)\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "from labelbox.schema.conflict_resolution_strategy import (\n", + " ConflictResolutionStrategy,)\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your API key\n", + "API_KEY = \"\"\n", + "# To get your API key go to: Workspace settings -> API -> Create API Key\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Creating Model Experiment\n", @@ -72,148 +83,245 @@ "In order to interact with Model Run predictions, you must create a Model Experiment with a Model Run and then add predictions. The steps below go over this process. See [Model](https://docs.labelbox.com/reference/model) from our developer guides for more information.\n", "\n", "To create a Model Experiment you will need to create an ontology. See [Ontology](https://docs.labelbox.com/reference/ontology) for more information" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "In this example we are making a simple ontology with a classification feature. The classification feature has two options: option 1 and option 2." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Demo Feature\",\n options=[lb.Option(value=\"option 1\"),\n lb.Option(value=\"option 2\")],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\"Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "classification_features = [\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"Demo Feature\",\n", + " options=[lb.Option(value=\"option 1\"),\n", + " lb.Option(value=\"option 2\")],\n", + " )\n", + "]\n", + "\n", + "ontology_builder = lb.OntologyBuilder(tools=[],\n", + " classifications=classification_features)\n", + "\n", + "ontology = client.create_ontology(\"Demo Ontology\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Experiment" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model = client.create_model(name=f\"Model Experiment Demo {str(uuid.uuid4())}\",\n ontology_id=ontology.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model = client.create_model(name=f\"Model Experiment Demo {str(uuid.uuid4())}\",\n", + " ontology_id=ontology.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Creating a Model Run from Model Experiment\n", "\n", "On this step we will need to create a dataset to attach data rows to our model run. See [Dataset](https://docs.labelbox.com/reference/dataset) for more information." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Dataset and Data Rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as data row for a dataset\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110\"\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"foundry-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")\nprint(f\"Failed data rows: {task.failed_data_rows}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as data row for a dataset\n", + "global_key = \"2560px-Kitano_Street_Kobe01s5s4110\" + str(uuid.uuid4())\n", + "\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"foundry-demo-dataset\")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "\n", + "print(f\"Errors: {task.errors}\")\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Create Model Run and Attach Data Rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run_name = f\"Model Run Demo {str(uuid.uuid4())}\"\n\nmodel_run = model.create_model_run(name=model_run_name)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run_name = f\"Model Run Demo {str(uuid.uuid4())}\"\n", + "\n", + "model_run = model.create_model_run(name=model_run_name)" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Add Predictions\n", "In the below code snippet we are adding a sample predictions and attaching them to our data row inside our model run." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"Demo Feature\",\n value=lb_types.Checklist(\n answer=[lb_types.ClassificationAnswer(name=\"option 1\", confidence=0.5)\n ]),\n)\n\n# Create prediction label\nlabel_prediction = [\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[checklist_prediction],\n )\n]\n\n# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"Demo Feature\",\n", + " value=lb_types.Checklist(\n", + " answer=[lb_types.ClassificationAnswer(name=\"option 1\", confidence=0.5)\n", + " ]),\n", + ")\n", + "\n", + "# Create prediction label\n", + "label_prediction = [\n", + " lb_types.Label(\n", + " data=lb_types.ImageData(global_key=global_key),\n", + " annotations=[checklist_prediction],\n", + " )\n", + "]\n", + "\n", + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for prediction uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup Project and Add Predictions\n", "In the steps below we will be creating our target project and setting up the project with the ontology we used with our model run. See [Project](https://docs.labelbox.com/reference/dataset) for more information." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a new project\nproject = client.create_project(name=\"Model Run Import Demo Project\",\n media_type=lb.MediaType.Image)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a new project\n", + "project = client.create_project(name=\"Model Run Import Demo Project\",\n", + " media_type=lb.MediaType.Image)" + ] }, { - "metadata": {}, - "source": "# Setup Ontology\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Setup Ontology\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology Mapping\n", "To send prediction to your annotate project you will need to provide a ontology mapping python dictionary item. This matches ontology feature id to another. You would use this if your ontology was different from your model run to your project. In our case, since we are using the same ontology, you would just need to map the same feature id to each other." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Get ontology dictionary to obtain featureSchemaIds\nontology_normalized = ontology.normalized\n\nPREDICTIONS_ONTOLOGY_MAPPING = {\n ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0]\n [\"featureSchemaId\"], # Classification featureSchemaID\n ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0][\"options\"][0]\n [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0][\"options\"][1]\n [\"featureSchemaId\"],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get ontology dictionary to obtain featureSchemaIds\n", + "ontology_normalized = ontology.normalized\n", + "\n", + "PREDICTIONS_ONTOLOGY_MAPPING = {\n", + " ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n", + " ontology_normalized[\"classifications\"][0]\n", + " [\"featureSchemaId\"], # Classification featureSchemaID\n", + " ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n", + " ontology_normalized[\"classifications\"][0][\"options\"][0]\n", + " [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n", + " ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n", + " ontology_normalized[\"classifications\"][0][\"options\"][1]\n", + " [\"featureSchemaId\"],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Send Model Predictions from Model Run to Annotate\n", @@ -235,36 +343,79 @@ " * ConflictResolutionStrategy.OverrideWithAnnotations\n", "* `param batch_priority`\n", " - The priority of the batch.\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Import Predictions as pre-labels" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "send_to_annotations_params = {\n \"predictions_ontology_mapping\":\n PREDICTIONS_ONTOLOGY_MAPPING,\n \"exclude_data_rows_in_project\":\n False,\n \"override_existing_annotations_rule\":\n ConflictResolutionStrategy.OverrideWithPredictions,\n \"batch_priority\":\n 5,\n}\n\n# Send the predictions as pre-labels\nqueue_id = [\n queue.uid\n for queue in project.task_queues()\n if queue.queue_type == \"INITIAL_LABELING_QUEUE\"\n][0]\n\ntask = model_run.send_to_annotate_from_model(\n destination_project_id=project.uid,\n task_queue_id=\n queue_id, # ID of workflow task, set ID to None if you want to convert pre-labels to ground truths .\n batch_name=\"Prediction Import Demo Batch\",\n data_rows=lb.GlobalKeys(\n [global_key] # Provide a list of global keys from foundry app task\n ),\n params=send_to_annotations_params,\n)\n\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "send_to_annotations_params = {\n", + " \"predictions_ontology_mapping\":\n", + " PREDICTIONS_ONTOLOGY_MAPPING,\n", + " \"exclude_data_rows_in_project\":\n", + " False,\n", + " \"override_existing_annotations_rule\":\n", + " ConflictResolutionStrategy.OverrideWithPredictions,\n", + " \"batch_priority\":\n", + " 5,\n", + "}\n", + "\n", + "# Send the predictions as pre-labels\n", + "queue_id = [\n", + " queue.uid\n", + " for queue in project.task_queues()\n", + " if queue.queue_type == \"INITIAL_LABELING_QUEUE\"\n", + "][0]\n", + "\n", + "task = model_run.send_to_annotate_from_model(\n", + " destination_project_id=project.uid,\n", + " task_queue_id=\n", + " queue_id, # ID of workflow task, set ID to None if you want to convert pre-labels to ground truths .\n", + " batch_name=\"Prediction Import Demo Batch\",\n", + " data_rows=lb.GlobalKeys(\n", + " [global_key] # Provide a list of global keys from foundry app task\n", + " ),\n", + " params=send_to_annotations_params,\n", + ")\n", + "\n", + "task.wait_till_done()\n", + "\n", + "print(f\"Errors: {task.errors}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()\n# model_run.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()\n", + "# model_run.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/prediction_upload/conversational_LLM_predictions.ipynb b/examples/prediction_upload/conversational_LLM_predictions.ipynb index be9878401..878a16649 100644 --- a/examples/prediction_upload/conversational_LLM_predictions.ipynb +++ b/examples/prediction_upload/conversational_LLM_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,138 +22,295 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# LLM pairwise comparison with Conversational text using Model\n", "\n", "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis in the model product.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Set up" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API Key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Supported annotations for conversational text" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Entity" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ner_prediction = lb_types.ObjectAnnotation(\n name=\"ner\",\n confidence=0.5,\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nner_prediction_ndjson = {\n \"name\": \"ner\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"message-1\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ner_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " confidence=0.5,\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", + ")\n", + "\n", + "ner_prediction_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"confidence\": 0.5,\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"message-1\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name=\"Response B\",\n confidence=0.5)),\n)\n\nradio_prediction_ndjson = {\n \"name\": \"Choose the best response\",\n \"answer\": {\n \"name\": \"Response B\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name=\"Response B\",\n", + " confidence=0.5)),\n", + ")\n", + "\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"Choose the best response\",\n", + " \"answer\": {\n", + " \"name\": \"Response B\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "text_prediction = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\",\n confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is the more concise answer\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\",\n", + " confidence=0.5),\n", + ")\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"Provide a reason for your choice\",\n", + " \"answer\": \"This is the more concise answer\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n \"messageId\": \"message-1\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + " message_id=\"message-1\", # Message specific annotation\n", + ")\n", + "\n", + "checklist_prediction_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + " \"messageId\": \"message-1\",\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Message based\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"message-1\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n# Global\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n# Global\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Message based\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "# Message based\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"messageId\":\n", + " \"message-1\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "# Global\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# Global\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows with \"modelOutputs\" into Catalog\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", @@ -169,218 +324,503 @@ " }\n", "]\n", "```\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Example of row_data with model outputs" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "pairwise_shopping_2 = \"\"\"\n {\n \"type\": \"application/vnd.labelbox.conversational\",\n \"version\": 1,\n \"messages\": [\n {\n \"messageId\": \"message-0\",\n \"timestampUsec\": 1530718491,\n \"content\": \"Hi! How can I help?\",\n \"user\": {\n \"userId\": \"Bot 002\",\n \"name\": \"Bot\"\n },\n \"align\": \"left\",\n \"canLabel\": false\n },\n {\n \"messageId\": \"message-1\",\n \"timestampUsec\": 1530718503,\n \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n \"user\": {\n \"userId\": \"User 00686\",\n \"name\": \"User\"\n },\n \"align\": \"right\",\n \"canLabel\": true\n }\n\n ],\n \"modelOutputs\": [\n {\n \"title\": \"Response A\",\n \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n },\n {\n \"title\": \"Response B\",\n \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n }\n ]\n}\n\"\"\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "pairwise_shopping_2 = \"\"\"\n", + " {\n", + " \"type\": \"application/vnd.labelbox.conversational\",\n", + " \"version\": 1,\n", + " \"messages\": [\n", + " {\n", + " \"messageId\": \"message-0\",\n", + " \"timestampUsec\": 1530718491,\n", + " \"content\": \"Hi! How can I help?\",\n", + " \"user\": {\n", + " \"userId\": \"Bot 002\",\n", + " \"name\": \"Bot\"\n", + " },\n", + " \"align\": \"left\",\n", + " \"canLabel\": false\n", + " },\n", + " {\n", + " \"messageId\": \"message-1\",\n", + " \"timestampUsec\": 1530718503,\n", + " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", + " \"user\": {\n", + " \"userId\": \"User 00686\",\n", + " \"name\": \"User\"\n", + " },\n", + " \"align\": \"right\",\n", + " \"canLabel\": true\n", + " }\n", + "\n", + " ],\n", + " \"modelOutputs\": [\n", + " {\n", + " \"title\": \"Response A\",\n", + " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", + " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", + " },\n", + " {\n", + " \"title\": \"Response B\",\n", + " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", + " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", + " }\n", + " ]\n", + "}\n", + "\"\"\"" + ] }, { - "metadata": {}, - "source": "global_key = \"pairwise_shooping_asset\"\nconvo_data = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n \"global_key\":\n global_key,\n}\n# Create a dataset\ndataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n# Create a datarows\ntask = dataset.create_data_rows([convo_data])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\n", + "convo_data = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n", + "# Create a datarows\n", + "task = dataset.create_data_rows([convo_data])\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create an ontology with relevant classifications\n\nontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n scope=lb.Classification.Scope.GLOBAL,\n name=\"Choose the best response\",\n options=[\n lb.Option(value=\"Response A\"),\n lb.Option(value=\"Response B\"),\n lb.Option(value=\"Tie\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"Provide a reason for your choice\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Pairwise comparison ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Conversational,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create an ontology with relevant classifications\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " name=\"Choose the best response\",\n", + " options=[\n", + " lb.Option(value=\"Response A\"),\n", + " lb.Option(value=\"Response B\"),\n", + " lb.Option(value=\"Tie\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"Provide a reason for your choice\",\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Pairwise comparison ontology\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Conversational,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create model\nmodel = client.create_model(name=\"Comparison_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create model run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create model\n", + "model = client.create_model(name=\"Comparison_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create model run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Step 5: Create the predictions payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_prediction,\n text_prediction,\n checklist_prediction,\n radio_prediction,\n nested_radio_prediction,\n nested_checklist_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " ner_prediction,\n", + " text_prediction,\n", + " checklist_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " nested_checklist_prediction,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "Setup the payload with the annotations that were created in Step 1." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annotations in [\n ner_prediction_ndjson,\n text_prediction_ndjson,\n checklist_prediction_ndjson,\n radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson = []\n", + "for annotations in [\n", + " ner_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(\n name=\"Conversational Human Evaluation Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(\n", + " name=\"Conversational Human Evaluation Demo\",\n", + " media_type=lb.MediaType.Conversational,\n", + ")\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"Response B\")),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Choose the best response\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"Response B\")),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"Provide a reason for your choice\",\n", + " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " message_id=\"message-1\", # Message specific annotation\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"message-1\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_annotation = []\nlabel_annotation.append(\n lb_types.Label(\n data=lb_types.ConversationData(global_key=global_key),\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_annotation = []\n", + "label_annotation.append(\n", + " lb_types.Label(\n", + " data=lb_types.ConversationData(global_key=global_key),\n", + " annotations=[\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label_annotation,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=label_annotation,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/prediction_upload/conversational_predictions.ipynb b/examples/prediction_upload/conversational_predictions.ipynb index b6a649d56..0cf5ede85 100644 --- a/examples/prediction_upload/conversational_predictions.ipynb +++ b/examples/prediction_upload/conversational_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Conversational Text Prediction Import\n", @@ -50,151 +48,434 @@ "* Relationships\n", "\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########### Radio Classification ###########\n\n# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification ###########\n", + "\n", + "# Python annotation\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\", confidence=0.5)),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "# message based classifications\nner_prediction = lb_types.ObjectAnnotation(\n name=\"ner\",\n confidence=0.5,\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\nner_prediction_ndjson = {\n \"name\": \"ner\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"4\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# message based classifications\n", + "ner_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " confidence=0.5,\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", + ")\n", + "\n", + "ner_prediction_ndjson = {\n", + " \"name\": \"ner\",\n", + " \"confidence\": 0.5,\n", + " \"location\": {\n", + " \"start\": 0,\n", + " \"end\": 8\n", + " },\n", + " \"messageId\": \"4\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Classification free text #####\n# Confidence scores are not supported for text predictions\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(\n answer=\"the answer to the text questions are right here\"),\n message_id=\"0\",\n)\n\ntext_prediction_ndjson = {\n \"name\": \"text_convo\",\n \"answer\": \"the answer to the text questions are right here\",\n \"messageId\": \"0\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Classification free text #####\n", + "# Confidence scores are not supported for text predictions\n", + "\n", + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"text_convo\",\n", + " value=lb_types.Text(\n", + " answer=\"the answer to the text questions are right here\"),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"text_convo\",\n", + " \"answer\": \"the answer to the text questions are right here\",\n", + " \"messageId\": \"0\",\n", + "}" + ] }, { - "metadata": {}, - "source": "##### Checklist Classification #######\n\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n message_id=\"2\",\n)\n\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n \"messageId\": \"2\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "##### Checklist Classification #######\n", + "\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + " message_id=\"2\",\n", + ")\n", + "\n", + "checklist_prediction_ndjson = {\n", + " \"name\": \"checklist_convo\",\n", + " \"answers\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + " \"messageId\": \"2\",\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Radio Classification ######\n\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n message_id=\"0\",\n)\n\nradio_prediction_ndjson = {\n \"name\": \"radio_convo\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n \"messageId\": \"0\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Radio Classification ######\n", + "\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_convo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_convo\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " \"messageId\": \"0\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\n# Message based\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"10\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n# Global\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n# Global\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "# Message based\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"10\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "# Message based\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"messageId\":\n", + " \"10\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "# Global\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# Global\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create one Labelbox dataset\n\nglobal_key = \"conversation-1.json\"\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"conversational_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create one Labelbox dataset\n", + "\n", + "global_key = \"conversation-1.json\" + str(uuid.uuid4())\n", + "\n", + "asset = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(\n", + " name=\"conversational_annotation_import_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\")],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n scope=lb.Classification.Scope.INDEX,\n name=\"text_convo\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_convo\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Conversation Annotations\",\n ontology_builder.asdict())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\")],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"text_convo\",\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " scope=lb.Classification.Scope.INDEX,\n", + " name=\"checklist_convo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_convo\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Conversation Annotations\",\n", + " ontology_builder.asdict())" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Mode and Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(\n name=\"Conversational_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(\n", + " name=\"Conversational_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid,\n", + ")\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -203,155 +484,304 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Python annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_prediction,\n checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " ner_prediction,\n", + " checklist_prediction,\n", + " text_prediction,\n", + " radio_prediction,\n", + " nested_checklist_prediction,\n", + " nested_radio_prediction,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "if using NDJSON : " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction_ndjson = []\nfor annotations in [\n ner_prediction_ndjson,\n text_prediction_ndjson,\n checklist_prediction_ndjson,\n radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annotations)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_prediction_ndjson = []\n", + "for annotations in [\n", + " ner_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + "]:\n", + " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_prediction_ndjson.append(annotations)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7 : Send annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(\n name=\"Conversational Text Prediction Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(\n", + " name=\"Conversational Text Prediction Import Demo\",\n", + " media_type=lb.MediaType.Conversational,\n", + ")\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(\n answer=\"the answer to the text questions are right here\"),\n message_id=\"0\",\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"2\",\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n message_id=\"0\",\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "ner_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner\",\n", + " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"text_convo\",\n", + " value=lb_types.Text(\n", + " answer=\"the answer to the text questions are right here\"),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_convo\", # must match your ontology feature\"s name\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " message_id=\"2\",\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_convo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " message_id=\"0\",\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " message_id=\"10\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data=lb_types.ConversationData(global_key=global_key),\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data=lb_types.ConversationData(global_key=global_key),\n", + " annotations=[\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " nested_checklist_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Option deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/prediction_upload/geospatial_predictions.ipynb b/examples/prediction_upload/geospatial_predictions.ipynb index e3c1212f3..bc589cd81 100644 --- a/examples/prediction_upload/geospatial_predictions.ipynb +++ b/examples/prediction_upload/geospatial_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Geospatial Prediction Import \n", @@ -49,174 +47,693 @@ "\n", "\n", "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets.\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import os\n\nimport uuid\nimport numpy as np\nfrom PIL import Image\nimport cv2\n\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import os\n", + "\n", + "import uuid\n", + "import numpy as np\n", + "from PIL import Image\n", + "import cv2\n", + "\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions\n", "- Each cell shows the python annotation and the NDJson annotation for each annotation type." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "####### Point #######\n\n# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n confidence=0.4,\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\": \"point_geo\",\n \"confidence\": 0.4,\n \"point\": {\n \"x\": -99.20647859573366,\n \"y\": 19.40018029091072\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Point #######\n", + "\n", + "# Python Annotation\n", + "point_prediction = lb_types.ObjectAnnotation(\n", + " name=\"point_geo\",\n", + " confidence=0.4,\n", + " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", + ")\n", + "\n", + "# NDJSON\n", + "point_prediction_ndjson = {\n", + " \"name\": \"point_geo\",\n", + " \"confidence\": 0.4,\n", + " \"point\": {\n", + " \"x\": -99.20647859573366,\n", + " \"y\": 19.40018029091072\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Polyline #######\n# Coordinates\ncoords = [\n [-99.20842051506044, 19.40032196622975],\n [-99.20809864997865, 19.39758963475322],\n [-99.20758366584778, 19.39776167179227],\n [-99.20728325843811, 19.3973265189299],\n]\n\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n confidence=0.5,\n value=lb_types.Line(points=line_points),\n)\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\": \"polyline_geo\",\n \"confidence\": 0.5,\n \"line\": line_points_ndjson,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Polyline #######\n", + "# Coordinates\n", + "coords = [\n", + " [-99.20842051506044, 19.40032196622975],\n", + " [-99.20809864997865, 19.39758963475322],\n", + " [-99.20758366584778, 19.39776167179227],\n", + " [-99.20728325843811, 19.3973265189299],\n", + "]\n", + "\n", + "line_points = []\n", + "line_points_ndjson = []\n", + "\n", + "for sub in coords:\n", + " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polyline_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polyline_geo\",\n", + " confidence=0.5,\n", + " value=lb_types.Line(points=line_points),\n", + ")\n", + "\n", + "# NDJSON\n", + "polyline_prediction_ndjson = {\n", + " \"name\": \"polyline_geo\",\n", + " \"confidence\": 0.5,\n", + " \"line\": line_points_ndjson,\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Polygon #######\n# Coordinates in the desired EPSG coordinate system\ncoords_polygon = [\n [-99.21042680740356, 19.40036244486966],\n [-99.2104160785675, 19.40017017124035],\n [-99.2103409767151, 19.400008256428897],\n [-99.21014785766603, 19.400008256428897],\n [-99.21019077301027, 19.39983622176518],\n [-99.21022295951845, 19.399674306621385],\n [-99.21029806137086, 19.39951239131646],\n [-99.2102873325348, 19.399340356128437],\n [-99.21025514602663, 19.399117722085677],\n [-99.21024441719057, 19.39892544698541],\n [-99.2102336883545, 19.39874329141769],\n [-99.21021223068239, 19.398561135646027],\n [-99.21018004417421, 19.398399219233365],\n [-99.21011567115785, 19.39822718286836],\n [-99.20992255210878, 19.398136104719125],\n [-99.20974016189577, 19.398085505725305],\n [-99.20957922935487, 19.398004547302467],\n [-99.20939683914186, 19.39792358883935],\n [-99.20918226242067, 19.39786286996558],\n [-99.20899987220764, 19.397822390703805],\n [-99.20891404151918, 19.397994427496787],\n [-99.20890331268312, 19.398176583902874],\n [-99.20889258384706, 19.398368859888045],\n [-99.20889258384706, 19.398540896103246],\n [-99.20890331268312, 19.39872305189756],\n [-99.20889258384706, 19.39890520748796],\n [-99.20889258384706, 19.39907724313608],\n [-99.20889258384706, 19.399259398329956],\n [-99.20890331268312, 19.399431433603585],\n [-99.20890331268312, 19.39961358840092],\n [-99.20890331268312, 19.399785623300048],\n [-99.20897841453552, 19.399937418648214],\n [-99.20919299125673, 19.399937418648214],\n [-99.2093861103058, 19.39991717927664],\n [-99.20956850051881, 19.39996777770086],\n [-99.20961141586305, 19.40013981222548],\n [-99.20963287353517, 19.40032196622975],\n [-99.20978307724, 19.4004130431554],\n [-99.20996546745302, 19.40039280384301],\n [-99.21019077301027, 19.400372564528084],\n [-99.21042680740356, 19.40036244486966],\n]\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n confidence=0.5,\n value=lb_types.Polygon(points=polygon_points),\n)\n\n# NDJSON\npolygon_prediction_ndjson = {\n \"name\": \"polygon_geo\",\n \"confidence\": 0.5,\n \"polygon\": polygon_points_ndjson,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Polygon #######\n", + "# Coordinates in the desired EPSG coordinate system\n", + "coords_polygon = [\n", + " [-99.21042680740356, 19.40036244486966],\n", + " [-99.2104160785675, 19.40017017124035],\n", + " [-99.2103409767151, 19.400008256428897],\n", + " [-99.21014785766603, 19.400008256428897],\n", + " [-99.21019077301027, 19.39983622176518],\n", + " [-99.21022295951845, 19.399674306621385],\n", + " [-99.21029806137086, 19.39951239131646],\n", + " [-99.2102873325348, 19.399340356128437],\n", + " [-99.21025514602663, 19.399117722085677],\n", + " [-99.21024441719057, 19.39892544698541],\n", + " [-99.2102336883545, 19.39874329141769],\n", + " [-99.21021223068239, 19.398561135646027],\n", + " [-99.21018004417421, 19.398399219233365],\n", + " [-99.21011567115785, 19.39822718286836],\n", + " [-99.20992255210878, 19.398136104719125],\n", + " [-99.20974016189577, 19.398085505725305],\n", + " [-99.20957922935487, 19.398004547302467],\n", + " [-99.20939683914186, 19.39792358883935],\n", + " [-99.20918226242067, 19.39786286996558],\n", + " [-99.20899987220764, 19.397822390703805],\n", + " [-99.20891404151918, 19.397994427496787],\n", + " [-99.20890331268312, 19.398176583902874],\n", + " [-99.20889258384706, 19.398368859888045],\n", + " [-99.20889258384706, 19.398540896103246],\n", + " [-99.20890331268312, 19.39872305189756],\n", + " [-99.20889258384706, 19.39890520748796],\n", + " [-99.20889258384706, 19.39907724313608],\n", + " [-99.20889258384706, 19.399259398329956],\n", + " [-99.20890331268312, 19.399431433603585],\n", + " [-99.20890331268312, 19.39961358840092],\n", + " [-99.20890331268312, 19.399785623300048],\n", + " [-99.20897841453552, 19.399937418648214],\n", + " [-99.20919299125673, 19.399937418648214],\n", + " [-99.2093861103058, 19.39991717927664],\n", + " [-99.20956850051881, 19.39996777770086],\n", + " [-99.20961141586305, 19.40013981222548],\n", + " [-99.20963287353517, 19.40032196622975],\n", + " [-99.20978307724, 19.4004130431554],\n", + " [-99.20996546745302, 19.40039280384301],\n", + " [-99.21019077301027, 19.400372564528084],\n", + " [-99.21042680740356, 19.40036244486966],\n", + "]\n", + "\n", + "polygon_points = []\n", + "polygon_points_ndjson = []\n", + "\n", + "for sub in coords_polygon:\n", + " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polygon_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polygon_geo\",\n", + " confidence=0.5,\n", + " value=lb_types.Polygon(points=polygon_points),\n", + ")\n", + "\n", + "# NDJSON\n", + "polygon_prediction_ndjson = {\n", + " \"name\": \"polygon_geo\",\n", + " \"confidence\": 0.5,\n", + " \"polygon\": polygon_points_ndjson,\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Bounding Box #######\ncoord_object = {\n \"coordinates\": [[\n [-99.20746564865112, 19.39799442829336],\n [-99.20746564865112, 19.39925939999194],\n [-99.20568466186523, 19.39925939999194],\n [-99.20568466186523, 19.39799442829336],\n [-99.20746564865112, 19.39799442829336],\n ]]\n}\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n confidence=0.5,\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\": \"bbox_geo\",\n \"confidence\": 0.5,\n \"bbox\": {\n \"top\":\n coord_object[\"coordinates\"][0][1][1],\n \"left\":\n coord_object[\"coordinates\"][0][1][0],\n \"height\":\n coord_object[\"coordinates\"][0][3][1] -\n coord_object[\"coordinates\"][0][1][1],\n \"width\":\n coord_object[\"coordinates\"][0][3][0] -\n coord_object[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Bounding Box #######\n", + "coord_object = {\n", + " \"coordinates\": [[\n", + " [-99.20746564865112, 19.39799442829336],\n", + " [-99.20746564865112, 19.39925939999194],\n", + " [-99.20568466186523, 19.39925939999194],\n", + " [-99.20568466186523, 19.39799442829336],\n", + " [-99.20746564865112, 19.39799442829336],\n", + " ]]\n", + "}\n", + "\n", + "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", + "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", + "\n", + "# Python Annotation\n", + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_geo\",\n", + " confidence=0.5,\n", + " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bbox_geo\",\n", + " \"confidence\": 0.5,\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object[\"coordinates\"][0][3][1] -\n", + " coord_object[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object[\"coordinates\"][0][3][0] -\n", + " coord_object[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - radio (single choice) #######\n\n# Python Annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question_geo\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - radio (single choice) #######\n", + "\n", + "# Python Annotation\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question_geo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question_geo\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - Checklist (multi-choice) #######\n\ncoord_object_checklist = {\n \"coordinates\": [[\n [-99.210266, 19.39540372195134],\n [-99.210266, 19.396901],\n [-99.20621067903966, 19.396901],\n [-99.20621067903966, 19.39540372195134],\n [-99.210266, 19.39540372195134],\n ]]\n}\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\n# NDJSON\nbbox_with_checklist_subclass_ndjson = {\n \"name\": \"bbox_checklist_geo\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"checklist_class_name\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"bbox\": {\n \"top\":\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_checklist[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_checklist[\"coordinates\"][0][3][1] -\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_checklist[\"coordinates\"][0][3][0] -\n coord_object_checklist[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - Checklist (multi-choice) #######\n", + "\n", + "coord_object_checklist = {\n", + " \"coordinates\": [[\n", + " [-99.210266, 19.39540372195134],\n", + " [-99.210266, 19.396901],\n", + " [-99.20621067903966, 19.396901],\n", + " [-99.20621067903966, 19.39540372195134],\n", + " [-99.210266, 19.39540372195134],\n", + " ]]\n", + "}\n", + "\n", + "# Python Annotation\n", + "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_checklist_geo\",\n", + " confidence=0.5,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", + " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_name\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_with_checklist_subclass_ndjson = {\n", + " \"name\": \"bbox_checklist_geo\",\n", + " \"confidence\": 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"checklist_class_name\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + " }],\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object_checklist[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object_checklist[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object_checklist[\"coordinates\"][0][3][1] -\n", + " coord_object_checklist[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object_checklist[\"coordinates\"][0][3][0] -\n", + " coord_object_checklist[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification free form text with bbox #######\n\ncoord_object_text = {\n \"coordinates\": [[\n [-99.21019613742828, 19.397447957052933],\n [-99.21019613742828, 19.39772119262215],\n [-99.20986354351044, 19.39772119262215],\n [-99.20986354351044, 19.397447957052933],\n [-99.21019613742828, 19.397447957052933],\n ]]\n}\n# Python Annotation\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\n# NDJSON\nbbox_with_free_text_subclass_ndjson = {\n \"name\": \"bbox_text_geo\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"free_text_geo\",\n \"confidence\": 0.5,\n \"answer\": \"sample text\"\n }],\n \"bbox\": {\n \"top\":\n coord_object_text[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_text[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_text[\"coordinates\"][0][3][1] -\n coord_object_text[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_text[\"coordinates\"][0][3][0] -\n coord_object_text[\"coordinates\"][0][1][0],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification free form text with bbox #######\n", + "\n", + "coord_object_text = {\n", + " \"coordinates\": [[\n", + " [-99.21019613742828, 19.397447957052933],\n", + " [-99.21019613742828, 19.39772119262215],\n", + " [-99.20986354351044, 19.39772119262215],\n", + " [-99.20986354351044, 19.397447957052933],\n", + " [-99.21019613742828, 19.397447957052933],\n", + " ]]\n", + "}\n", + "# Python Annotation\n", + "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_text_geo\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.21019613742828,\n", + " y=19.397447957052933), # Top left\n", + " end=lb_types.Point(x=-99.20986354351044,\n", + " y=19.39772119262215), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", + " ],\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_with_free_text_subclass_ndjson = {\n", + " \"name\": \"bbox_text_geo\",\n", + " \"confidence\": 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"free_text_geo\",\n", + " \"confidence\": 0.5,\n", + " \"answer\": \"sample text\"\n", + " }],\n", + " \"bbox\": {\n", + " \"top\":\n", + " coord_object_text[\"coordinates\"][0][1][1],\n", + " \"left\":\n", + " coord_object_text[\"coordinates\"][0][1][0],\n", + " \"height\":\n", + " coord_object_text[\"coordinates\"][0][3][1] -\n", + " coord_object_text[\"coordinates\"][0][1][1],\n", + " \"width\":\n", + " coord_object_text[\"coordinates\"][0][3][0] -\n", + " coord_object_text[\"coordinates\"][0][1][0],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Classification - Checklist (multi-choice) #######\n\n# Python Annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question_geo\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"third_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Classification - Checklist (multi-choice) #######\n", + "\n", + "# Python Annotation\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question_geo\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question_geo\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"third_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.2)),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.2,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.3\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Radio and Checklist (with subclassifications) ##########\n", + "\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.2)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "# NDJSON\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.2,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.3\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\nbottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n\nepsg = lb_types.EPSG.EPSG4326\nbounds = lb_types.TiledBounds(epsg=epsg,\n bounds=[top_left_bound, bottom_right_bound])\nglobal_key = \"mexico_city\"\n\ntile_layer = lb_types.TileLayer(\n url=\n \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n)\n\ntiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[17, 23])\n\nasset = {\n \"row_data\": tiled_image_data.asdict(),\n \"global_key\": global_key,\n \"media_type\": \"TMS_GEO\",\n}\n\ndataset = client.create_dataset(name=\"geo_demo_dataset\")\ntask = dataset.create_data_rows([asset])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\n", + "bottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n", + "\n", + "epsg = lb_types.EPSG.EPSG4326\n", + "bounds = lb_types.TiledBounds(epsg=epsg,\n", + " bounds=[top_left_bound, bottom_right_bound])\n", + "global_key = \"mexico_city\" + uuid.uuid4()\n", + "\n", + "tile_layer = lb_types.TileLayer(\n", + " url=\n", + " \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n", + ")\n", + "\n", + "tiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n", + " tile_bounds=bounds,\n", + " zoom_levels=[17, 23])\n", + "\n", + "asset = {\n", + " \"row_data\": tiled_image_data.asdict(),\n", + " \"global_key\": global_key,\n", + " \"media_type\": \"TMS_GEO\",\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"geo_demo_dataset\")\n", + "task = dataset.create_data_rows([asset])\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_checklist_geo\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_name\",\n options=[lb.Option(value=\"first_checklist_answer\")],\n ),\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_text_geo\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text_geo\"),\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question_geo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question_geo\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Geospatial Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Geospatial_Tile,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_checklist_geo\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class_name\",\n", + " options=[lb.Option(value=\"first_checklist_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_text_geo\",\n", + " classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text_geo\"),\n", + " ],\n", + " ),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question_geo\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question_geo\",\n", + " options=[lb.Option(value=\"first_radio_answer\")],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Geospatial Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Geospatial_Tile,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"geospatial_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"geospatial_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -224,156 +741,420 @@ "Create the annotations payload using the snippets in the **Supported Predictions Section**. \n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 and PIL python libraries\n\nhsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\nmask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\nkernel = np.ones((15, 20), np.uint8)\nmask = cv2.erode(mask, kernel)\nmask = cv2.dilate(mask, kernel)\nmask_annotation = lb_types.MaskData.from_2D_arr(mask)\nmask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\nh, w, _ = tiled_image_data.value.shape\npixel_bounds = lb_types.TiledBounds(\n epsg=lb_types.EPSG.SIMPLEPIXEL,\n bounds=[lb_types.Point(x=0, y=0),\n lb_types.Point(x=w, y=h)],\n)\ntransformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n src_epsg=pixel_bounds.epsg,\n pixel_bounds=pixel_bounds,\n geo_bounds=tiled_image_data.tile_bounds,\n zoom=23,\n)\npixel_polygons = mask_data.shapely.simplify(3)\nlist_of_polygons = [\n transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n]\npolygon_prediction_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n name=\"polygon_geo_2\",\n confidence=0.5)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 and PIL python libraries\n", + "\n", + "hsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\n", + "mask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\n", + "kernel = np.ones((15, 20), np.uint8)\n", + "mask = cv2.erode(mask, kernel)\n", + "mask = cv2.dilate(mask, kernel)\n", + "mask_annotation = lb_types.MaskData.from_2D_arr(mask)\n", + "mask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\n", + "h, w, _ = tiled_image_data.value.shape\n", + "pixel_bounds = lb_types.TiledBounds(\n", + " epsg=lb_types.EPSG.SIMPLEPIXEL,\n", + " bounds=[lb_types.Point(x=0, y=0),\n", + " lb_types.Point(x=w, y=h)],\n", + ")\n", + "transformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n", + " src_epsg=pixel_bounds.epsg,\n", + " pixel_bounds=pixel_bounds,\n", + " geo_bounds=tiled_image_data.tile_bounds,\n", + " zoom=23,\n", + ")\n", + "pixel_polygons = mask_data.shapely.simplify(3)\n", + "list_of_polygons = [\n", + " transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n", + "]\n", + "polygon_prediction_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n", + " name=\"polygon_geo_2\",\n", + " confidence=0.5)" + ] }, { - "metadata": {}, - "source": "labels = []\nlabels.append(\n lb_types.Label(\n data={\n \"global_key\": global_key,\n \"tile_layer\": tile_layer,\n \"tile_bounds\": bounds,\n \"zoom_levels\": [12, 20],\n },\n annotations=[\n point_prediction,\n polyline_prediction,\n polygon_prediction,\n bbox_prediction,\n radio_prediction,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_prediction,\n polygon_prediction_two,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\n", + " \"global_key\": global_key,\n", + " \"tile_layer\": tile_layer,\n", + " \"tile_bounds\": bounds,\n", + " \"zoom_levels\": [12, 20],\n", + " },\n", + " annotations=[\n", + " point_prediction,\n", + " polyline_prediction,\n", + " polygon_prediction,\n", + " bbox_prediction,\n", + " radio_prediction,\n", + " bbox_with_checklist_subclass,\n", + " bbox_with_free_text_subclass,\n", + " checklist_prediction,\n", + " polygon_prediction_two,\n", + " nested_checklist_prediction,\n", + " nested_radio_prediction,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# If using NDJSON" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor prediction in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_with_free_text_subclass_ndjson,\n bbox_with_checklist_subclass_ndjson,\n bbox_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n polygon_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n prediction.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(prediction)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for prediction in [\n", + " radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " bbox_with_free_text_subclass_ndjson,\n", + " bbox_with_checklist_subclass_ndjson,\n", + " bbox_prediction_ndjson,\n", + " point_prediction_ndjson,\n", + " polyline_prediction_ndjson,\n", + " polygon_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + "]:\n", + " prediction.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_ndjson.append(prediction)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(name=\"prediction_upload_job\" +\n str(uuid.uuid4()),\n predictions=labels)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(name=\"prediction_upload_job\" +\n", + " str(uuid.uuid4()),\n", + " predictions=labels)\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"geospatial_prediction_demo\",\n media_type=lb.MediaType.Geospatial_Tile)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"geospatial_prediction_demo\",\n", + " media_type=lb.MediaType.Geospatial_Tile)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_geospatial_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[global_key], # A list of data rows or data row ids\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_geospatial_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key], # A list of data rows or data row ids\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "####### Point #######\n\n# Python Annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n####### Polyline #######\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n value=lb_types.Line(points=line_points),\n)\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n value=lb_types.Polygon(points=polygon_points),\n)\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# Python Annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n)\n\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Point #######\n", + "\n", + "# Python Annotation\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point_geo\",\n", + " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", + ")\n", + "\n", + "####### Polyline #######\n", + "line_points = []\n", + "line_points_ndjson = []\n", + "\n", + "for sub in coords:\n", + " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline_geo\",\n", + " value=lb_types.Line(points=line_points),\n", + ")\n", + "\n", + "polygon_points = []\n", + "polygon_points_ndjson = []\n", + "\n", + "for sub in coords_polygon:\n", + " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", + " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", + "\n", + "# Python Annotation\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon_geo\",\n", + " value=lb_types.Polygon(points=polygon_points),\n", + ")\n", + "\n", + "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", + "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", + "\n", + "# Python Annotation\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_geo\",\n", + " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", + ")\n", + "\n", + "# Python Annotation\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question_geo\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "# Python Annotation\n", + "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_checklist_geo\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", + " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_name\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", + " name=\"bbox_text_geo\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=-99.21019613742828,\n", + " y=19.397447957052933), # Top left\n", + " end=lb_types.Point(x=-99.20986354351044,\n", + " y=19.39772119262215), # Bottom right\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", + " ],\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question_geo\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\nlabels.append(\n lb_types.Label(\n data=lb_types.TiledImageData(\n global_key=global_key,\n tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[12, 20],\n ),\n annotations=[\n point_annotation,\n polyline_annotation,\n polygon_annotation,\n bbox_annotation,\n radio_annotation,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "labels.append(\n", + " lb_types.Label(\n", + " data=lb_types.TiledImageData(\n", + " global_key=global_key,\n", + " tile_layer=tile_layer,\n", + " tile_bounds=bounds,\n", + " zoom_levels=[12, 20],\n", + " ),\n", + " annotations=[\n", + " point_annotation,\n", + " polyline_annotation,\n", + " polygon_annotation,\n", + " bbox_annotation,\n", + " radio_annotation,\n", + " bbox_with_checklist_subclass,\n", + " bbox_with_free_text_subclass,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"geospatial_annotations_import_\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"geospatial_annotations_import_\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6. Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# upload_job\n# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# upload_job\n", + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/prediction_upload/html_predictions.ipynb b/examples/prediction_upload/html_predictions.ipynb index 14e5510a1..0caa5fdd1 100644 --- a/examples/prediction_upload/html_predictions.ipynb +++ b/examples/prediction_upload/html_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# HTML Prediction Import\n", @@ -48,138 +46,363 @@ "- NER\n", "\n", "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid\nimport numpy as np", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid\n", + "import numpy as np" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########### Radio Classification ###########\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\", # Should match the name in the ontology\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification ###########\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\", # Should match the name in the ontology\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "#### Nested Classifications ######\n\n# Python annotation\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "#### Nested Classifications ######\n", + "\n", + "# Python annotation\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Checklist ##########\n\n# Python annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_question\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Checklist ##########\n", + "\n", + "# Python annotation\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\": \"checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification Free-Form text ##########\n## Text classifications do not support confidence values\n# Python annotation\ntext_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification Free-Form text ##########\n", + "## Text classifications do not support confidence values\n", + "# Python annotation\n", + "text_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n", + " value=lb_types.Text(\n", + " answer=\"sample text\",\n", + " confidence=0.5))\n", + "\n", + "# NDJSON\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"sample_html_2.html\"\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"html prediction demo dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"sample_html_2.html\" + str(uuid.uuid4())\n", + "\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "dataset = client.create_dataset(\n", + " name=\"html prediction demo dataset\",\n", + " iam_integration=\n", + " None, # Removing this argument will default to the organziation's default iam integration\n", + ")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names should match the name field in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\n \"radio_question\", # name matching the tool used in the annotation\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ])\n\nontology = client.create_ontology(\n \"Ontology HTML Predictions\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Html,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\n", + " \"radio_question\", # name matching the tool used in the annotation\n", + " options=[lb.Option(value=\"first_radio_answer\")],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ])\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology HTML Predictions\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Html,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"HTML_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"HTML_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -189,149 +412,290 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.HTMLData(global_key=global_key),\n annotations=[\n radio_prediction,\n checklist_prediction,\n text_prediction,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Create a Label for predictions\n", + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data=lb_types.HTMLData(global_key=global_key),\n", + " annotations=[\n", + " radio_prediction,\n", + " checklist_prediction,\n", + " text_prediction,\n", + " nested_checklist_prediction,\n", + " nested_radio_prediction,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction_ndjson = []\nfor annot in [\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n checklist_prediction_ndjson,\n text_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_prediction_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_prediction_ndjson = []\n", + "for annot in [\n", + " radio_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_prediction_ndjson.append(annot)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"HTML prediction import demo\",\n media_type=lb.MediaType.Html)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"HTML prediction import demo\",\n", + " media_type=lb.MediaType.Html)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_prediction_html\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_prediction_html\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "###### Annotations ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "###### Annotations ######\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",)\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",),\n", + " ]),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"html_annotation_import\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"html_annotation_import\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/prediction_upload/image_predictions.ipynb b/examples/prediction_upload/image_predictions.ipynb index b28c9f78f..372881bb8 100644 --- a/examples/prediction_upload/image_predictions.ipynb +++ b/examples/prediction_upload/image_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Image Prediction Import\n", @@ -48,273 +46,869 @@ "- Classification - checklist\n", "\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * If you are importing more than 1,000 mask predictions at a time, consider submitting separate jobs, as they can take longer than other prediction types to import.\n", " * After the execution of this notebook a complete Model Run with predictions will be created in your organization. " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\", confidence=0.5)),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotations\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotations\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " confidence=0.5,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"confidence\": 0.5,\n", + " \"bbox\": {\n", + " \"top\": 977,\n", + " \"left\": 1690,\n", + " \"height\": 330,\n", + " \"width\": 225\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "####### Bounding box with nested classification #######\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Bounding box with nested classification #######\n", + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "## NDJSON\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"confidence\": 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 933,\n", + " \"left\": 541,\n", + " \"height\": 191,\n", + " \"width\": 330\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\n# NDJSON\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Anotation\n", + "polygon_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " confidence=0.5,\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "\n", + "polygon_prediction_ndjson = {\n", + " \"name\":\n", + " \"polygon\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"polygon\": [\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " {\n", + " \"x\": 2278.306,\n", + " \"y\": 256.885\n", + " },\n", + " {\n", + " \"x\": 2428.197,\n", + " \"y\": 200.437\n", + " },\n", + " {\n", + " \"x\": 2560.0,\n", + " \"y\": 335.419\n", + " },\n", + " {\n", + " \"x\": 2557.386,\n", + " \"y\": 503.165\n", + " },\n", + " {\n", + " \"x\": 2320.596,\n", + " \"y\": 503.103\n", + " },\n", + " {\n", + " \"x\": 2156.083,\n", + " \"y\": 628.943\n", + " },\n", + " {\n", + " \"x\": 2161.111,\n", + " \"y\": 785.519\n", + " },\n", + " {\n", + " \"x\": 2002.115,\n", + " \"y\": 894.647\n", + " },\n", + " {\n", + " \"x\": 1838.456,\n", + " \"y\": 877.874\n", + " },\n", + " {\n", + " \"x\": 1436.53,\n", + " \"y\": 874.636\n", + " },\n", + " {\n", + " \"x\": 1411.403,\n", + " \"y\": 758.579\n", + " },\n", + " {\n", + " \"x\": 1353.853,\n", + " \"y\": 751.74\n", + " },\n", + " {\n", + " \"x\": 1345.264,\n", + " \"y\": 453.461\n", + " },\n", + " {\n", + " \"x\": 1426.011,\n", + " \"y\": 421.129\n", + " },\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "text_annotation = lb_types.ClassificationAnnotation(name=\"free_text\",\n", + " value=lb_types.Text(\n", + " answer=\"sample text\",\n", + " confidence=0.5))\n", + "\n", + "# NDJSON\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Segmentation mask" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "### Raster Segmentation (Byte string array)\nurl = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg.png\"\nresponse = requests.get(url)\n\nmask_data = lb.types.MaskData(\n im_bytes=response.content\n) # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\nmask_prediction = lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=(255, 255,\n 255)))\n\n# NDJSON using instanceURI, bytes array is not fully supported.\nmask_prediction_ndjson = {\n \"name\": \"mask\",\n \"classifications\": [],\n \"mask\": {\n \"instanceURI\": url,\n \"colorRGB\": (255, 255, 255)\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "### Raster Segmentation (Byte string array)\n", + "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg.png\"\n", + "response = requests.get(url)\n", + "\n", + "mask_data = lb.types.MaskData(\n", + " im_bytes=response.content\n", + ") # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\n", + "mask_prediction = lb_types.ObjectAnnotation(name=\"mask\",\n", + " value=lb_types.Mask(mask=mask_data,\n", + " color=(255, 255,\n", + " 255)))\n", + "\n", + "# NDJSON using instanceURI, bytes array is not fully supported.\n", + "mask_prediction_ndjson = {\n", + " \"name\": \"mask\",\n", + " \"classifications\": [],\n", + " \"mask\": {\n", + " \"instanceURI\": url,\n", + " \"colorRGB\": (255, 255, 255)\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Segmentation mask with nested classification" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "url_2 = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg_with_subclass.png\"\nresponse_2 = requests.get(url_2)\nmask_data_2 = lb_types.MaskData(im_bytes=response_2.content)\n\n# Python annotation\nmask_with_text_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer\"))\n ],\n)\n\n# NDJSON using instanceURI, bytes array is not fully supported.\nmask_with_text_subclass_prediction_ndjson = {\n \"name\":\n \"mask_with_text_subclass\",\n \"mask\": {\n \"instanceURI\": url_2,\n \"colorRGB\": (255, 255, 255)\n },\n \"classifications\": [{\n \"name\": \"sub_free_text\",\n \"answer\": \"free text answer\"\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "url_2 = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg_with_subclass.png\"\n", + "response_2 = requests.get(url_2)\n", + "mask_data_2 = lb_types.MaskData(im_bytes=response_2.content)\n", + "\n", + "# Python annotation\n", + "mask_with_text_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n", + " value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_free_text\",\n", + " value=lb_types.Text(answer=\"free text answer\"))\n", + " ],\n", + ")\n", + "\n", + "# NDJSON using instanceURI, bytes array is not fully supported.\n", + "mask_with_text_subclass_prediction_ndjson = {\n", + " \"name\":\n", + " \"mask_with_text_subclass\",\n", + " \"mask\": {\n", + " \"instanceURI\": url_2,\n", + " \"colorRGB\": (255, 255, 255)\n", + " },\n", + " \"classifications\": [{\n", + " \"name\": \"sub_free_text\",\n", + " \"answer\": \"free text answer\"\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "point_prediction = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " confidence=0.5,\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "# NDJSON\n", + "point_prediction_ndjson = {\n", + " \"name\": \"point\",\n", + " \"confidence\": 0.5,\n", + " \"classifications\": [],\n", + " \"point\": {\n", + " \"x\": 1166.606,\n", + " \"y\": 1441.768\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\n\npolyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "\n", + "polyline_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " confidence=0.5,\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "polyline_prediction_ndjson = {\n", + " \"name\":\n", + " \"polyline\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [],\n", + " \"line\": [\n", + " {\n", + " \"x\": 2534.353,\n", + " \"y\": 249.471\n", + " },\n", + " {\n", + " \"x\": 2429.492,\n", + " \"y\": 182.092\n", + " },\n", + " {\n", + " \"x\": 2294.322,\n", + " \"y\": 221.962\n", + " },\n", + " {\n", + " \"x\": 2224.491,\n", + " \"y\": 180.463\n", + " },\n", + " {\n", + " \"x\": 2136.123,\n", + " \"y\": 204.716\n", + " },\n", + " {\n", + " \"x\": 1712.247,\n", + " \"y\": 173.949\n", + " },\n", + " {\n", + " \"x\": 1703.838,\n", + " \"y\": 84.438\n", + " },\n", + " {\n", + " \"x\": 1579.772,\n", + " \"y\": 82.61\n", + " },\n", + " {\n", + " \"x\": 1583.442,\n", + " \"y\": 167.552\n", + " },\n", + " {\n", + " \"x\": 1478.869,\n", + " \"y\": 164.903\n", + " },\n", + " {\n", + " \"x\": 1418.941,\n", + " \"y\": 318.149\n", + " },\n", + " {\n", + " \"x\": 1243.128,\n", + " \"y\": 400.815\n", + " },\n", + " {\n", + " \"x\": 1022.067,\n", + " \"y\": 319.007\n", + " },\n", + " {\n", + " \"x\": 892.367,\n", + " \"y\": 379.216\n", + " },\n", + " {\n", + " \"x\": 670.273,\n", + " \"y\": 364.408\n", + " },\n", + " {\n", + " \"x\": 613.114,\n", + " \"y\": 288.16\n", + " },\n", + " {\n", + " \"x\": 377.559,\n", + " \"y\": 238.251\n", + " },\n", + " {\n", + " \"x\": 368.087,\n", + " \"y\": 185.064\n", + " },\n", + " {\n", + " \"x\": 246.557,\n", + " \"y\": 167.286\n", + " },\n", + " {\n", + " \"x\": 236.648,\n", + " \"y\": 285.61\n", + " },\n", + " {\n", + " \"x\": 90.929,\n", + " \"y\": 326.412\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s.jpeg\"\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(name=\"image_prediction_demo\")\ntask = dataset.create_data_rows([test_img_url])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"2560px-Kitano_Street_Kobe01s.jpeg\" + str(uuid.uuid4())\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "dataset = client.create_dataset(name=\"image_prediction_demo\")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of tools\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", + " name=\"mask_with_text_subclass\",\n", + " classifications=[\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"sub_free_text\")\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Image Prediction Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"image_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"image_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -322,150 +916,398 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section. \n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n mask_prediction,\n mask_with_text_subclass_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label for predictions\n", + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data=lb_types.ImageData(global_key=global_key),\n", + " annotations=[\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " polyline_prediction,\n", + " polygon_prediction,\n", + " mask_prediction,\n", + " mask_with_text_subclass_prediction,\n", + " point_prediction,\n", + " text_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON:" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n mask_prediction_ndjson,\n mask_with_text_subclass_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_prediction_ndjson = []\n", + "\n", + "for annot in [\n", + " radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " polygon_prediction_ndjson,\n", + " mask_prediction_ndjson,\n", + " mask_with_text_subclass_prediction_ndjson,\n", + " point_prediction_ndjson,\n", + " polyline_prediction_ndjson,\n", + " text_annotation_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + "]:\n", + " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_prediction_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for prediction uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations. \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Image Prediction Demo\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"Image Prediction Demo\",\n", + " media_type=lb.MediaType.Image)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nmask_annotation = lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=(255, 255,\n 255)))\n\nmask_with_text_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer\"))\n ],\n)\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Annotations ###########\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "mask_annotation = lb_types.ObjectAnnotation(name=\"mask\",\n", + " value=lb_types.Mask(mask=mask_data,\n", + " color=(255, 255,\n", + " 255)))\n", + "\n", + "mask_with_text_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n", + " value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_free_text\",\n", + " value=lb_types.Text(answer=\"free text answer\"))\n", + " ],\n", + ")\n", + "\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n mask_annotation,\n mask_with_text_subclass_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "label = []\n", + "annotations = [\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " polygon_annotation,\n", + " mask_annotation,\n", + " mask_with_text_subclass_annotation,\n", + " point_annotation,\n", + " polyline_annotation,\n", + "]\n", + "label.append(\n", + " lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"annotation_import_\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the annotations from the project and add them to the model\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the annotations from the project and add them to the model\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 82d9f05cb..83c168fcd 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -53,115 +51,419 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_prediction_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": bbox_dim_1,\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", + ")\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\",\n", + " confidence=0.5,\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": bbox_dim,\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_prediction_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -175,60 +477,200 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483.pdf\"\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -237,184 +679,508 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json_str)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = json.loads(output.json_str)\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + "\n", + "# re-write the entity annotation with text selections\n", + "entities_prediction_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_prediction_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " textSelections=text_selections_ner,\n", + ")\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", + "print(f\"entities_annotation={entities_prediction}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions = []\n", + "\n", + "label_predictions.append(\n", + " lb_types.Label(\n", + " data=lb_types.DocumentData(global_key=global_key),\n", + " annotations=[\n", + " entities_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " text_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " ner_with_checklist_subclass_prediction,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions_ndjson = []\n", + "for annot in [\n", + " entities_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " ner_with_checklist_subclass_prediction_ndjson,\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_predictions_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_predictions,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(name=\"Document Prediction Import Demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(name=\"named_entity\",\n", + " textSelections=text_selections),\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",)\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", + " text_selections=text_selections_ner),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb index 8943ca17b..aba84a546 100644 --- a/examples/prediction_upload/text_predictions.ipynb +++ b/examples/prediction_upload/text_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Text Prediction Import\n", @@ -50,145 +48,392 @@ "\n", "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle.\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entities ##########\n\n# Python annotation\nnamed_entity = lb_types.TextEntity(start=10, end=20)\nentities_prediction = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\",\n confidence=0.5)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\": \"named_entity\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 10,\n \"end\": 20\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entities ##########\n", + "\n", + "# Python annotation\n", + "named_entity = lb_types.TextEntity(start=10, end=20)\n", + "entities_prediction = lb_types.ObjectAnnotation(value=named_entity,\n", + " name=\"named_entity\",\n", + " confidence=0.5)\n", + "\n", + "# NDJSON\n", + "entities_prediction_ndjson = {\n", + " \"name\": \"named_entity\",\n", + " \"confidence\": 0.5,\n", + " \"location\": {\n", + " \"start\": 10,\n", + " \"end\": 20\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio (single choice ) ##########\n\n# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Radio (single choice ) ##########\n", + "\n", + "# Python annotation\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification - Radio and Checklist (with subclassifcations) ##########\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification - Radio and Checklist (with subclassifcations) ##########\n", + "\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Checklist ##########\n\n# Python annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_question\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Checklist ##########\n", + "\n", + "# Python annotation\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\": \"checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classification Free-Form text ##########\n\n# Python annotation\ntext_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classification Free-Form text ##########\n", + "\n", + "# Python annotation\n", + "text_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n", + " value=lb_types.Text(\n", + " answer=\"sample text\",\n", + " confidence=0.5))\n", + "\n", + "# NDJSON\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"lorem-ipsum.txt\"\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"text prediction demo dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "dataset = client.create_dataset(\n", + " name=\"text prediction demo dataset\",\n", + " iam_integration=\n", + " None, # Removing this argument will default to the organziation's default iam integration\n", + ")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "print(\"Errors:\", task.errors)\n", + "print(\"Failed data rows:\", task.failed_data_rows)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\")\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Text Predictions\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Text,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[lb.Option(value=\"first_radio_answer\")],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " lb.Option(value=\"third_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\")\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Text Predictions\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Text,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"text_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"text_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -198,149 +443,293 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label for predictions\nlabel_predictions = []\nlabel_predictions.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_prediction,\n nested_radio_prediction,\n radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Create a Label for predictions\n", + "label_predictions = []\n", + "label_predictions.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_prediction,\n", + " nested_radio_prediction,\n", + " radio_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " text_prediction,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson_predictions = []\nfor annot in [\n entities_prediction_ndjson,\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n text_prediction_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson_predictions.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "label_ndjson_predictions = []\n", + "for annot in [\n", + " entities_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + "]:\n", + " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_ndjson_predictions.append(annot)" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_predictions,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Text Prediction Import Demo\",\n media_type=lb.MediaType.Text)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"Text Prediction Import Demo\",\n", + " media_type=lb.MediaType.Text)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "named_entity = lb_types.TextEntity(start=10, end=20)\nentities_annotation = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\")\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "named_entity = lb_types.TextEntity(start=10, end=20)\n", + "entities_annotation = lb_types.ObjectAnnotation(value=named_entity,\n", + " name=\"named_entity\")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n# Create a Label for predictions\nlabel = []\nlabel.append(\n lb_types.Label(\n data=lb_types.TextData(global_key=global_key),\n annotations=[\n entities_annotation,\n nested_radio_annotation,\n radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "# Create a Label for predictions\n", + "label = []\n", + "label.append(\n", + " lb_types.Label(\n", + " data=lb_types.TextData(global_key=global_key),\n", + " annotations=[\n", + " entities_annotation,\n", + " nested_radio_annotation,\n", + " radio_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " ],\n", + " ))" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/prediction_upload/video_predictions.ipynb b/examples/prediction_upload/video_predictions.ipynb index b4a7a5cd4..62ea29567 100644 --- a/examples/prediction_upload/video_predictions.ipynb +++ b/examples/prediction_upload/video_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Video Prediction Import \n", @@ -49,328 +47,1450 @@ "- Raster segmentation masks [not supported in model]\n", "- Vector segmentation masks [not supported in video editor]\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions\n", "- Confidence scores are currently not supported for segment or frame annotations, which are required for bounding box, point, and line for video assets. For this tutorial, only the radio and checklist annotations will have confidence scores." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "####### Bounding box (frame specific) ###########\n\n# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation\n\n# bbox dimensions\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n\n# Python Annotation\nbbox_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=15,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 13,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 15,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 19,\n \"bbox\": bbox_dm\n },\n ]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Bounding box (frame specific) ###########\n", + "\n", + "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation\n", + "\n", + "# bbox dimensions\n", + "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", + "\n", + "# Python Annotation\n", + "bbox_prediction = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"],\n", + " y=bbox_dm[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=15,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=19,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + "]\n", + "\n", + "# NDJSON\n", + "bbox_prediction_ndjson = {\n", + " \"name\":\n", + " \"bbox_video\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 13,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " {\n", + " \"frame\": 15,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " {\n", + " \"frame\": 19,\n", + " \"bbox\": bbox_dm\n", + " },\n", + " ]\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Point ########\n# Confidence score is not supported for VideoObjectAnnotation\n# Python Annotation\npoint_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\":\n \"point_video\",\n \"confidence\":\n 0.5,\n \"segments\": [{\n \"keyframes\": [{\n \"frame\": 17,\n \"point\": {\n \"x\": 660.134,\n \"y\": 407.926\n }\n }]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Point ########\n", + "# Confidence score is not supported for VideoObjectAnnotation\n", + "# Python Annotation\n", + "point_prediction = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"point_video\",\n", + " keyframe=True,\n", + " frame=17,\n", + " value=lb_types.Point(x=660.134, y=407.926),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "point_prediction_ndjson = {\n", + " \"name\":\n", + " \"point_video\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"segments\": [{\n", + " \"keyframes\": [{\n", + " \"frame\": 17,\n", + " \"point\": {\n", + " \"x\": 660.134,\n", + " \"y\": 407.926\n", + " }\n", + " }]\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Polyline (frame specific) ########\n# confidence scores are not supported in polyline annotations\n\n# Python Annotation\npolyline_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=12,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\":\n \"line_video_frame\",\n \"segments\": [\n {\n \"keyframes\": [\n {\n \"frame\":\n 5,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 100\n },\n {\n \"x\": 100,\n \"y\": 190\n },\n {\n \"x\": 190,\n \"y\": 220\n },\n ],\n },\n {\n \"frame\":\n 12,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 280\n },\n {\n \"x\": 300,\n \"y\": 380\n },\n {\n \"x\": 400,\n \"y\": 460\n },\n ],\n },\n {\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 180\n },\n {\n \"x\": 100,\n \"y\": 200\n },\n {\n \"x\": 200,\n \"y\": 260\n },\n ],\n },\n ]\n },\n {\n \"keyframes\": [\n {\n \"frame\": 24,\n \"line\": [{\n \"x\": 300,\n \"y\": 310\n }, {\n \"x\": 330,\n \"y\": 430\n }],\n },\n {\n \"frame\": 45,\n \"line\": [{\n \"x\": 600,\n \"y\": 810\n }, {\n \"x\": 900,\n \"y\": 930\n }],\n },\n ]\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Polyline (frame specific) ########\n", + "# confidence scores are not supported in polyline annotations\n", + "\n", + "# Python Annotation\n", + "polyline_prediction = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=5,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=12,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=20,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=24,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=45,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + "]\n", + "\n", + "# NDJSON\n", + "polyline_prediction_ndjson = {\n", + " \"name\":\n", + " \"line_video_frame\",\n", + " \"segments\": [\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\":\n", + " 5,\n", + " \"line\": [\n", + " {\n", + " \"x\": 680,\n", + " \"y\": 100\n", + " },\n", + " {\n", + " \"x\": 100,\n", + " \"y\": 190\n", + " },\n", + " {\n", + " \"x\": 190,\n", + " \"y\": 220\n", + " },\n", + " ],\n", + " },\n", + " {\n", + " \"frame\":\n", + " 12,\n", + " \"line\": [\n", + " {\n", + " \"x\": 680,\n", + " \"y\": 280\n", + " },\n", + " {\n", + " \"x\": 300,\n", + " \"y\": 380\n", + " },\n", + " {\n", + " \"x\": 400,\n", + " \"y\": 460\n", + " },\n", + " ],\n", + " },\n", + " {\n", + " \"frame\":\n", + " 20,\n", + " \"line\": [\n", + " {\n", + " \"x\": 680,\n", + " \"y\": 180\n", + " },\n", + " {\n", + " \"x\": 100,\n", + " \"y\": 200\n", + " },\n", + " {\n", + " \"x\": 200,\n", + " \"y\": 260\n", + " },\n", + " ],\n", + " },\n", + " ]\n", + " },\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 24,\n", + " \"line\": [{\n", + " \"x\": 300,\n", + " \"y\": 310\n", + " }, {\n", + " \"x\": 330,\n", + " \"y\": 430\n", + " }],\n", + " },\n", + " {\n", + " \"frame\": 45,\n", + " \"line\": [{\n", + " \"x\": 600,\n", + " \"y\": 810\n", + " }, {\n", + " \"x\": 900,\n", + " \"y\": 930\n", + " }],\n", + " },\n", + " ]\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "######## Frame base classifications ########\n\n# Python Annotation\nradio_prediction = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n ),\n]\n\nchecklist_prediction = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=39,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=45,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n]\n\n## NDJSON\nframe_radio_classification_prediction_ndjson = {\n \"name\": \"radio_class\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"frames\": [{\n \"start\": 9,\n \"end\": 15\n }],\n },\n}\n\n## frame specific\nframe_checklist_classification_prediction_ndjson = {\n \"name\":\n \"checklist_class\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n {\n \"name\": \"second_checklist_answer\",\n \"frames\": [{\n \"start\": 39,\n \"end\": 45\n }],\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######## Frame base classifications ########\n", + "\n", + "# Python Annotation\n", + "radio_prediction = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=9,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=15,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + " ),\n", + "]\n", + "\n", + "checklist_prediction = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=29,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=35,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=39,\n", + " segment_index=1,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=45,\n", + " segment_index=1,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " ),\n", + "]\n", + "\n", + "## NDJSON\n", + "frame_radio_classification_prediction_ndjson = {\n", + " \"name\": \"radio_class\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"frames\": [{\n", + " \"start\": 9,\n", + " \"end\": 15\n", + " }],\n", + " },\n", + "}\n", + "\n", + "## frame specific\n", + "frame_checklist_classification_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_class\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"frames\": [{\n", + " \"start\": 29,\n", + " \"end\": 35\n", + " }],\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"frames\": [{\n", + " \"start\": 39,\n", + " \"end\": 45\n", + " }],\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "####### Global Classifications #########\n\n# Python Annotation\n## For global classifications use ClassificationAnnotation\nglobal_radio_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n )\n]\n\nglobal_checklist_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n )\n]\n\n# NDJSON\nglobal_radio_classification_ndjson = {\n \"name\": \"radio_class_global\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}\n\nglobal_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class_global\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "####### Global Classifications #########\n", + "\n", + "# Python Annotation\n", + "## For global classifications use ClassificationAnnotation\n", + "global_radio_prediction = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"radio_class_global\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + " )\n", + "]\n", + "\n", + "global_checklist_prediction = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_global\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "global_radio_classification_ndjson = {\n", + " \"name\": \"radio_class_global\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}\n", + "\n", + "global_checklist_classification_ndjson = {\n", + " \"name\":\n", + " \"checklist_class_global\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Nested Global Classification ###########\n\n# Python Annotation\nnested_radio_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n )\n]\n\n# NDJSON\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\n# Python Annotation\nnested_checklist_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n )\n]\n\n# NDJSON\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Nested Global Classification ###########\n", + "\n", + "# Python Annotation\n", + "nested_radio_prediction = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}\n", + "\n", + "# Python Annotation\n", + "nested_checklist_prediction = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "# NDJSON\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########## Classifications under frame base tools ##########\n# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n\n# bounding box dimensions\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n\n# Python Annotation\nframe_bbox_with_checklist_subclass_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\", confidence=0.5)\n ]),\n )\n ],\n ),\n]\n\nframe_bbox_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"bbox_class\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 10,\n \"bbox\": bbox_dm2\n },\n {\n \"frame\":\n 11,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\":\n \"bbox_radio\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5,\n }],\n }],\n },\n {\n \"frame\":\n 13,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\":\n \"bbox_radio\",\n \"answer\": [{\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5,\n }],\n }],\n },\n ]\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Classifications under frame base tools ##########\n", + "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n", + "\n", + "# bounding box dimensions\n", + "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", + "\n", + "# Python Annotation\n", + "frame_bbox_with_checklist_subclass_prediction = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=10,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", + " y=bbox_dm2[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=11,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ),\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"second_checklist_answer\", confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + "]\n", + "\n", + "frame_bbox_with_checklist_subclass_prediction_ndjson = {\n", + " \"name\":\n", + " \"bbox_class\",\n", + " \"segments\": [{\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 10,\n", + " \"bbox\": bbox_dm2\n", + " },\n", + " {\n", + " \"frame\":\n", + " 11,\n", + " \"bbox\":\n", + " bbox_dm2,\n", + " \"classifications\": [{\n", + " \"name\":\n", + " \"bbox_radio\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " }],\n", + " }],\n", + " },\n", + " {\n", + " \"frame\":\n", + " 13,\n", + " \"bbox\":\n", + " bbox_dm2,\n", + " \"classifications\": [{\n", + " \"name\":\n", + " \"bbox_radio\",\n", + " \"answer\": [{\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5,\n", + " }],\n", + " }],\n", + " },\n", + " ]\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######### Free text classification ###########\ntext_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature's name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n )\n]\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"confidence\": 0.5,\n \"answer\": \"sample text\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### Free text classification ###########\n", + "text_prediction = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature's name\n", + " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", + " )\n", + "]\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"confidence\": 0.5,\n", + " \"answer\": \"sample text\",\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"sample-video-2.mp4\"\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"Video prediction demo\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors: \", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"sample-video-2.mp4\" + str(uuid.uuid4())\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "dataset = client.create_dataset(\n", + " name=\"Video prediction demo\",\n", + " iam_integration=\n", + " None, # Removing this argument will default to the organziation's default iam integration\n", + ")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "print(\"Errors: \", task.errors)\n", + "print(\"Failed data rows: \", task.failed_data_rows)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_class\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n )\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class_global\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_global\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Video Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Video,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n", + " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_class\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class\",\n", + " scope=lb.Classification.Scope.\n", + " INDEX, ## defined scope for frame classifications\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class\",\n", + " scope=lb.Classification.Scope.\n", + " INDEX, ## defined scope for frame classifications\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_class\",\n", + " scope=lb.Classification.Scope.INDEX,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_class_global\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_class_global\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology Video Annotations\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Video,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"video_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"video_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", "\n", "Create the annotations payload using the snippets of [code here](https://docs.labelbox.com/reference/import-video-annotations).\n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions = []\nannotations_list = [\n point_prediction,\n bbox_prediction,\n polyline_prediction,\n checklist_prediction,\n radio_prediction,\n nested_radio_prediction,\n nested_checklist_prediction,\n frame_bbox_with_checklist_subclass_prediction,\n global_radio_prediction,\n global_checklist_prediction,\n text_prediction,\n]\n\nflatten_list_annotations = [\n ann for ann_sublist in annotations_list for ann in ann_sublist\n]\n\nlabel_predictions.append(\n lb_types.Label(data={\"global_key\": global_key},\n annotations=flatten_list_annotations))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions = []\n", + "annotations_list = [\n", + " point_prediction,\n", + " bbox_prediction,\n", + " polyline_prediction,\n", + " checklist_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " nested_checklist_prediction,\n", + " frame_bbox_with_checklist_subclass_prediction,\n", + " global_radio_prediction,\n", + " global_checklist_prediction,\n", + " text_prediction,\n", + "]\n", + "\n", + "flatten_list_annotations = [\n", + " ann for ann_sublist in annotations_list for ann in ann_sublist\n", + "]\n", + "\n", + "label_predictions.append(\n", + " lb_types.Label(data={\"global_key\": global_key},\n", + " annotations=flatten_list_annotations))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel_prediction_ndjson = []\n\nfor annotation in [\n point_prediction_ndjson,\n bbox_prediction_ndjson,\n polyline_prediction_ndjson,\n frame_checklist_classification_prediction_ndjson,\n frame_radio_classification_prediction_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n frame_bbox_with_checklist_subclass_prediction_ndjson,\n global_radio_classification_ndjson,\n global_checklist_classification_ndjson,\n text_prediction_ndjson,\n]:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annotation)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "label_prediction_ndjson = []\n", + "\n", + "for annotation in [\n", + " point_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " polyline_prediction_ndjson,\n", + " frame_checklist_classification_prediction_ndjson,\n", + " frame_radio_classification_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " frame_bbox_with_checklist_subclass_prediction_ndjson,\n", + " global_radio_classification_ndjson,\n", + " global_checklist_classification_ndjson,\n", + " text_prediction_ndjson,\n", + "]:\n", + " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_prediction_ndjson.append(annotation)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_predictions,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"video_prediction_demo\",\n media_type=lb.MediaType.Video)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"video_prediction_demo\",\n", + " media_type=lb.MediaType.Video)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_video_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[global_key\n ], # A list of data rows, data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_video_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[global_key\n", + " ], # A list of data rows, data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\npoint_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n######## Polyline ########\n\n# Python Annotation\npolyline_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=12,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\nradio_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n]\n\nchecklist_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=39,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=45,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n ]),\n ),\n]\n\nglobal_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n )\n]\n\nglobal_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n )\n]\n\nnested_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n )\n]\n\nnested_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n )\n]\n\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\nframe_bbox_with_checklist_subclass = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\")\n ]),\n )\n ],\n ),\n]\n\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\nbbox_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=15,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\ntext_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature's name\n value=lb_types.Text(answer=\"sample text\"),\n )\n]", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "point_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"point_video\",\n", + " keyframe=True,\n", + " frame=17,\n", + " value=lb_types.Point(x=660.134, y=407.926),\n", + " )\n", + "]\n", + "\n", + "######## Polyline ########\n", + "\n", + "# Python Annotation\n", + "polyline_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=5,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=12,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=20,\n", + " segment_index=0,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=24,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"line_video_frame\",\n", + " keyframe=True,\n", + " frame=45,\n", + " segment_index=1,\n", + " value=lb_types.Line(\n", + " points=[lb_types.Point(x=680, y=100),\n", + " lb_types.Point(x=100, y=190)]),\n", + " ),\n", + "]\n", + "\n", + "radio_annotation = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=9,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"radio_class\",\n", + " frame=15,\n", + " segment_index=0,\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " ),\n", + "]\n", + "\n", + "checklist_annotation = [\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=29,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=35,\n", + " segment_index=0,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=39,\n", + " segment_index=1,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n", + " ]),\n", + " ),\n", + " lb_types.VideoClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " frame=45,\n", + " segment_index=1,\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n", + " ]),\n", + " ),\n", + "]\n", + "\n", + "global_radio_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"radio_class_global\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + " )\n", + "]\n", + "\n", + "global_checklist_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class_global\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "nested_radio_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + "]\n", + "\n", + "nested_checklist_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + " )\n", + "]\n", + "\n", + "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", + "frame_bbox_with_checklist_subclass = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=10,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", + " y=bbox_dm2[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=11,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", + " y=bbox_dm2[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_class\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", + " y=bbox_dm2[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", + " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"checklist_class\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"second_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " ),\n", + "]\n", + "\n", + "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", + "bbox_annotation = [\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=13,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"],\n", + " y=bbox_dm[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=15,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + " lb_types.VideoObjectAnnotation(\n", + " name=\"bbox_video\",\n", + " keyframe=True,\n", + " frame=19,\n", + " segment_index=0,\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", + " end=lb_types.Point(\n", + " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", + " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", + " ),\n", + " ),\n", + " ),\n", + "]\n", + "\n", + "text_annotation = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature's name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + " )\n", + "]" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, - "source": [], - "cell_type": "markdown" + "source": [] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n\nlabels = []\nannotations_list = [\n checklist_annotation,\n radio_annotation,\n bbox_annotation,\n frame_bbox_with_checklist_subclass,\n point_annotation,\n polyline_annotation,\n global_checklist_annotation,\n global_radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n text_annotation,\n]\n\nflatten_list_annotations = [\n ann for ann_sublist in annotations_list for ann in ann_sublist\n]\n\nlabels.append(\n lb_types.Label(\n data=lb_types.VideoData(global_key=global_key),\n annotations=flatten_list_annotations,\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "\n", + "labels = []\n", + "annotations_list = [\n", + " checklist_annotation,\n", + " radio_annotation,\n", + " bbox_annotation,\n", + " frame_bbox_with_checklist_subclass,\n", + " point_annotation,\n", + " polyline_annotation,\n", + " global_checklist_annotation,\n", + " global_radio_annotation,\n", + " nested_checklist_annotation,\n", + " nested_radio_annotation,\n", + " text_annotation,\n", + "]\n", + "\n", + "flatten_list_annotations = [\n", + " ann for ann_sublist in annotations_list for ann in ann_sublist\n", + "]\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data=lb_types.VideoData(global_key=global_key),\n", + " annotations=flatten_list_annotations,\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"video_annotations_import_\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"video_annotations_import_\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6. Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/project_configuration/webhooks.ipynb b/examples/project_configuration/webhooks.ipynb index 36b6f977b..482abec79 100644 --- a/examples/project_configuration/webhooks.ipynb +++ b/examples/project_configuration/webhooks.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,69 +22,115 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Webhook Configuration" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Webhooks are supported for the following events:\n", "* label_created\n", "* label_updated\n", "* label_deleted" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"\n%pip install -q requests\n%pip install -q hmac\n%pip install -q hashlib\n%pip install -q flask\n%pip install -q Werkzeug", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"\n", + "%pip install -q requests\n", + "%pip install -q hmac\n", + "%pip install -q hashlib\n", + "%pip install -q flask\n", + "%pip install -q Werkzeug" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nfrom flask import Flask, request\nimport hmac\nimport hashlib\nimport threading\nfrom werkzeug.serving import run_simple\nimport json\nimport requests\nimport os\nfrom getpass import getpass\nimport socket", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "from flask import Flask, request\n", + "import hmac\n", + "import hashlib\n", + "import threading\n", + "from werkzeug.serving import run_simple\n", + "import json\n", + "import requests\n", + "import os\n", + "from getpass import getpass\n", + "import socket" + ] }, { - "metadata": {}, - "source": "# If you don\"t want to give google access to drive you can skip this cell\n# and manually set `API_KEY` below.\n\nCOLAB = \"google.colab\" in str(get_ipython())\nif COLAB:\n %pip install colab-env -qU\n from colab_env import envvar_handler\n\n envvar_handler.envload()\n\nAPI_KEY = os.environ.get(\"LABELBOX_API_KEY\")\nif not os.environ.get(\"LABELBOX_API_KEY\"):\n API_KEY = getpass(\"Please enter your labelbox api key\")\n if COLAB:\n envvar_handler.add_env(\"LABELBOX_API_KEY\", API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# If you don\"t want to give google access to drive you can skip this cell\n", + "# and manually set `API_KEY` below.\n", + "\n", + "COLAB = \"google.colab\" in str(get_ipython())\n", + "if COLAB:\n", + " %pip install colab-env -qU\n", + " from colab_env import envvar_handler\n", + "\n", + " envvar_handler.envload()\n", + "\n", + "API_KEY = os.environ.get(\"LABELBOX_API_KEY\")\n", + "if not os.environ.get(\"LABELBOX_API_KEY\"):\n", + " API_KEY = getpass(\"Please enter your labelbox api key\")\n", + " if COLAB:\n", + " envvar_handler.add_env(\"LABELBOX_API_KEY\", API_KEY)" + ] }, { - "metadata": {}, - "source": "# Set this to a project that you want to use for the webhook\nPROJECT_ID = \"\"\n# Only update this if you have an on-prem deployment\nENDPOINT = \"https://api.labelbox.com/graphql\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set this to a project that you want to use for the webhook\n", + "PROJECT_ID = \"\"\n", + "# Only update this if you have an on-prem deployment\n", + "ENDPOINT = \"https://api.labelbox.com/graphql\"" + ] }, { - "metadata": {}, - "source": "client = lb.Client(api_key=API_KEY, endpoint=ENDPOINT)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "client = lb.Client(api_key=API_KEY, endpoint=ENDPOINT)" + ] }, { - "metadata": {}, - "source": "# We are using port 3001 for this example.\n# Feel free to set to whatever port you want\nWH_PORT = 3001", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# We are using port 3001 for this example.\n", + "# Feel free to set to whatever port you want\n", + "WH_PORT = 3001" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Configure NGROK (Optional)\n", @@ -97,114 +141,211 @@ "2. Download ngrok and extract the zip file\n", "3. Add ngrok to your path\n", "4. Add the authtoken `ngrok authtoken `" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "if not COLAB:\n os.system(f\"ngrok http {WH_PORT} &\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "if not COLAB:\n", + " os.system(f\"ngrok http {WH_PORT} &\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Configure server to receive requests" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# This can be any secret that matches your webhook config (we will set later)\nsecret = b\"example_secret\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# This can be any secret that matches your webhook config (we will set later)\n", + "secret = b\"example_secret\"" + ] }, { - "metadata": {}, - "source": "app = Flask(__name__)\n\n\n@app.route(\"/\")\ndef hello_world():\n return \"Hello, World!\"\n\n\n@app.route(\"/webhook-endpoint\", methods=[\"POST\"])\ndef print_webhook_info():\n payload = request.data\n computed_signature = hmac.new(secret, msg=payload,\n digestmod=hashlib.sha1).hexdigest()\n if request.headers[\"X-Hub-Signature\"] != \"sha1=\" + computed_signature:\n print(\n \"Error: computed_signature does not match signature provided in the headers\"\n )\n return \"Error\", 500, 200\n\n print(\"=========== New Webhook Delivery ============\")\n print(\"Delivery ID: %s\" % request.headers[\"X-Labelbox-Id\"])\n print(\"Event: %s\" % request.headers[\"X-Labelbox-Event\"])\n print(\"Payload: %s\" %\n json.dumps(json.loads(payload.decode(\"utf8\")), indent=4))\n return \"Success\"\n\n\nthread = threading.Thread(target=lambda: run_simple(\"0.0.0.0\", WH_PORT, app))\nthread.start()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "app = Flask(__name__)\n", + "\n", + "\n", + "@app.route(\"/\")\n", + "def hello_world():\n", + " return \"Hello, World!\"\n", + "\n", + "\n", + "@app.route(\"/webhook-endpoint\", methods=[\"POST\"])\n", + "def print_webhook_info():\n", + " payload = request.data\n", + " computed_signature = hmac.new(secret, msg=payload,\n", + " digestmod=hashlib.sha1).hexdigest()\n", + " if request.headers[\"X-Hub-Signature\"] != \"sha1=\" + computed_signature:\n", + " print(\n", + " \"Error: computed_signature does not match signature provided in the headers\"\n", + " )\n", + " return \"Error\", 500, 200\n", + "\n", + " print(\"=========== New Webhook Delivery ============\")\n", + " print(\"Delivery ID: %s\" % request.headers[\"X-Labelbox-Id\"])\n", + " print(\"Event: %s\" % request.headers[\"X-Labelbox-Event\"])\n", + " print(\"Payload: %s\" %\n", + " json.dumps(json.loads(payload.decode(\"utf8\")), indent=4))\n", + " return \"Success\"\n", + "\n", + "\n", + "thread = threading.Thread(target=lambda: run_simple(\"0.0.0.0\", WH_PORT, app))\n", + "thread.start()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Test server" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "print(requests.get(\"http://localhost:3001\").text)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "print(requests.get(\"http://localhost:3001\").text)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create Webhook" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "- Set ip address if your ip is publicly accessible.\n", "- Otherwise use the following to get ngrok public_url" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "if not COLAB:\n res = requests.get(\"http://localhost:4040/api/tunnels\")\n assert (res.status_code == 200\n ), f\"ngrok probably isn't running. {res.status_code}, {res.text}\"\n tunnels = res.json()[\"tunnels\"]\n tunnel = [\n t for t in tunnels if t[\"config\"][\"addr\"].split(\":\")[-1] == str(WH_PORT)\n ]\n tunnel = tunnel[0] # Should only be one..\n public_url = tunnel[\"public_url\"]\nelse:\n public_url = (\n f\"http://{socket.gethostbyname(socket.getfqdn(socket.gethostname()))}\")\nprint(public_url)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "if not COLAB:\n", + " res = requests.get(\"http://localhost:4040/api/tunnels\")\n", + " assert (res.status_code == 200\n", + " ), f\"ngrok probably isn't running. {res.status_code}, {res.text}\"\n", + " tunnels = res.json()[\"tunnels\"]\n", + " tunnel = [\n", + " t for t in tunnels if t[\"config\"][\"addr\"].split(\":\")[-1] == str(WH_PORT)\n", + " ]\n", + " tunnel = tunnel[0] # Should only be one..\n", + " public_url = tunnel[\"public_url\"]\n", + "else:\n", + " public_url = (\n", + " f\"http://{socket.gethostbyname(socket.getfqdn(socket.gethostname()))}\")\n", + "print(public_url)" + ] }, { - "metadata": {}, - "source": "# Set project to limit the scope to a single project\nproject = client.get_project(PROJECT_ID)\ntopics = {topic.value for topic in lb.Webhook.Topic}\n# For Global Webhooks (Global = per workspace) project = None\nwebhook = lb.Webhook.create(\n client,\n topics=topics,\n url=public_url,\n secret=secret.decode(),\n project=project,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Set project to limit the scope to a single project\n", + "project = client.get_project(PROJECT_ID)\n", + "topics = {topic.value for topic in lb.Webhook.Topic}\n", + "# For Global Webhooks (Global = per workspace) project = None\n", + "webhook = lb.Webhook.create(\n", + " client,\n", + " topics=topics,\n", + " url=public_url,\n", + " secret=secret.decode(),\n", + " project=project,\n", + ")" + ] }, { - "metadata": {}, - "source": "# Ok so we should be configured assuming everything is setup correctly.\n# Go to the following url and make a new label to see if it works\nprint(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Ok so we should be configured assuming everything is setup correctly.\n", + "# Go to the following url and make a new label to see if it works\n", + "print(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Update Webhook" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# url, topics, and status can all be updated\nupdated_url = f\"{public_url}/webhook-endpoint\"\nprint(updated_url)\nwebhook.update(url=updated_url)\n# Go to the following url and try one last time.\n# Any supported action should work (create, delete, or update a label)\nprint(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# url, topics, and status can all be updated\n", + "updated_url = f\"{public_url}/webhook-endpoint\"\n", + "print(updated_url)\n", + "webhook.update(url=updated_url)\n", + "# Go to the following url and try one last time.\n", + "# Any supported action should work (create, delete, or update a label)\n", + "print(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### List and delete all webhooks" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# DELETE:\nwebhook.update(status=lb.Webhook.Status.INACTIVE.value)\n\n# FETCH ALL WEBHOOKS:\norg = client.get_organization()\nwebhooks = org.webhooks()\n\n# Run this to clear all.\n# WARNING!!! THIS WILL DELETE ALL WEBHOOKS FOR YOUR ORG\n# ONLY RUN THIS IS YOU KNOW WHAT YOU ARE DOING.\n# for webhook in webhooks:\n# print(webhook)\n# webhook.update(status = lb.Webhook.Status.INACTIVE.value)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# DELETE:\n", + "webhook.update(status=lb.Webhook.Status.INACTIVE.value)\n", + "\n", + "# FETCH ALL WEBHOOKS:\n", + "org = client.get_organization()\n", + "webhooks = org.webhooks()\n", + "\n", + "# Run this to clear all.\n", + "# WARNING!!! THIS WILL DELETE ALL WEBHOOKS FOR YOUR ORG\n", + "# ONLY RUN THIS IS YOU KNOW WHAT YOU ARE DOING.\n", + "# for webhook in webhooks:\n", + "# print(webhook)\n", + "# webhook.update(status = lb.Webhook.Status.INACTIVE.value)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 5 +} From fbfd3bd5ee83d5e4466ae57cabc52dc6e84711dd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 28 May 2024 15:52:43 +0000 Subject: [PATCH 2/2] :art: Cleaned --- examples/annotation_import/audio.ipynb | 356 +--- .../annotation_import/conversational.ipynb | 519 +---- .../conversational_LLM.ipynb | 565 +----- examples/annotation_import/dicom.ipynb | 399 +--- examples/annotation_import/html.ipynb | 476 +---- examples/annotation_import/image.ipynb | 1023 ++-------- examples/annotation_import/pdf.ipynb | 1030 ++-------- examples/annotation_import/text.ipynb | 519 +---- examples/annotation_import/tiled.ipynb | 835 ++------ examples/annotation_import/video.ipynb | 1205 ++--------- examples/basics/data_row_metadata.ipynb | 352 +--- .../custom_metrics_demo.ipynb | 1806 ++--------------- .../model_predictions_to_project.ipynb | 321 +-- .../conversational_LLM_predictions.ipynb | 732 ++----- .../conversational_predictions.ipynb | 690 ++----- .../geospatial_predictions.ipynb | 1041 ++-------- .../prediction_upload/html_predictions.ipynb | 604 ++---- .../prediction_upload/image_predictions.ipynb | 1156 ++--------- .../prediction_upload/pdf_predictions.ipynb | 1046 ++-------- .../prediction_upload/text_predictions.ipynb | 635 ++---- .../prediction_upload/video_predictions.ipynb | 1376 ++----------- examples/project_configuration/webhooks.ipynb | 289 +-- 22 files changed, 2587 insertions(+), 14388 deletions(-) diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb index 22d25a130..437130a9e 100644 --- a/examples/annotation_import/audio.ipynb +++ b/examples/annotation_import/audio.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Audio Annotation Import\n", @@ -51,188 +53,111 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for Audio" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_audio\",\n value=lb_types.Text(answer=\"free text audio annotation\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_audio\",\n \"answer\": \"free text audio annotation\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Classification free text #####\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"text_audio\",\n", - " value=lb_types.Text(answer=\"free text audio annotation\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"text_audio\",\n", - " \"answer\": \"free text audio annotation\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_audio\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_audio\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Checklist Classification #######\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_audio\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_audio\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_audio\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_audio\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Radio Classification ######\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_audio\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_audio\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create one Labelbox dataset\n", - "\n", - "global_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n", - "\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -240,232 +165,135 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_audio`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_audio`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"text_audio\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_audio\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_audio\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - "])\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Audio Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Audio,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create Labelbox project\nproject = client.create_project(name=\"audio_project\",\n media_type=lb.MediaType.Audio)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "cell_type": "code", "outputs": [], - "source": [ - "# Create Labelbox project\n", - "project = client.create_project(name=\"audio_project\",\n", - " media_type=lb.MediaType.Audio)\n", - "\n", - "# Setup your ontology\n", - "project.setup_editor(\n", - " ontology) # Connect your ontology and editor to your project" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[text_annotation, checklist_annotation, radio_annotation],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload our label using Model-Assisted Labeling\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb index 1e4c28de1..fd691b9a2 100644 --- a/examples/annotation_import/conversational.ipynb +++ b/examples/annotation_import/conversational.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Conversational Text Annotation Import\n", @@ -53,290 +55,125 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for conversational text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# message based classifications\nner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\nner_annotation_ndjson = {\n \"name\": \"ner\",\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"4\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# message based classifications\n", - "ner_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", - ")\n", - "\n", - "ner_annotation_ndjson = {\n", - " \"name\": \"ner\",\n", - " \"location\": {\n", - " \"start\": 0,\n", - " \"end\": 8\n", - " },\n", - " \"messageId\": \"4\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n message_id=\"0\",\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_convo\",\n \"answer\": \"the answer to the text questions right here\",\n \"messageId\": \"0\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Classification free text #####\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"text_convo\",\n", - " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"text_convo\",\n", - " \"answer\": \"the answer to the text questions right here\",\n", - " \"messageId\": \"0\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"2\",\n)\n\nchecklist_annotation_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n \"messageId\": \"2\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Checklist Classification #######\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " message_id=\"2\",\n", - ")\n", - "\n", - "checklist_annotation_ndjson = {\n", - " \"name\": \"checklist_convo\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - " \"messageId\": \"2\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n message_id=\"0\",\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_convo\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n \"messageId\": \"0\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Radio Classification ######\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_convo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_convo\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - " \"messageId\": \"0\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n# Message based\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"10\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n# Global\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\n# Global\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "# Message based\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"10\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",)\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "# Message based\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"messageId\":\n", - " \"10\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "# Global\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "# Global\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create one Labelbox dataset\n\nglobal_key = \"conversation-1.json\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"conversational_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create one Labelbox dataset\n", - "\n", - "global_key = \"conversation-1.json\" + str(uuid.uuid4())\n", - "\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(\n", - " name=\"conversational_annotation_import_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -344,283 +181,135 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_convo`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_convo`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n scope=lb.Classification.Scope.INDEX,\n name=\"text_convo\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_convo\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Conversation Annotations\",\n ontology_builder.asdict())", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"text_convo\",\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"checklist_convo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_convo\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Ontology Conversation Annotations\",\n", - " ontology_builder.asdict())" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "cell_type": "code", "outputs": [], - "source": [ - "# Create Labelbox project\n", - "project = client.create_project(\n", - " name=\"Conversational Text Annotation Import Demo\",\n", - " media_type=lb.MediaType.Conversational,\n", - ")\n", - "\n", - "# Setup your ontology\n", - "project.setup_editor(\n", - " ontology) # Connect your ontology and editor to your project" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. However,for conversational texts NDJSON is the only supported format. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " ner_annotation,\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " nested_checklist_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n ner_annotation_ndjson,\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " ner_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload our label using Model-Assisted Labeling\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/conversational_LLM.ipynb b/examples/annotation_import/conversational_LLM.ipynb index 733607bde..a1870990e 100644 --- a/examples/annotation_import/conversational_LLM.ipynb +++ b/examples/annotation_import/conversational_LLM.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,270 +24,137 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n", "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Set up" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Supported annotations for conversational text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Entity " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nner_annotation_ndjson = {\n \"name\": \"ner\",\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"message-1\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "ner_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", - ")\n", - "\n", - "ner_annotation_ndjson = {\n", - " \"name\": \"ner\",\n", - " \"location\": {\n", - " \"start\": 0,\n", - " \"end\": 8\n", - " },\n", - " \"messageId\": \"message-1\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "radio_annotation = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"Response B\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"Choose the best response\",\n \"answer\": {\n \"name\": \"Response B\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"Choose the best response\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"Response B\")),\n", - ")\n", - "\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"Choose the best response\",\n", - " \"answer\": {\n", - " \"name\": \"Response B\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "text_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is the more concise answer\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"Provide a reason for your choice\",\n", - " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"Provide a reason for your choice\",\n", - " \"answer\": \"This is the more concise answer\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nchecklist_annotation_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n \"messageId\": \"message-1\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " message_id=\"message-1\", # Message specific annotation\n", - ")\n", - "\n", - "checklist_annotation_ndjson = {\n", - " \"name\": \"checklist_convo\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - " \"messageId\": \"message-1\",\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Message based\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"message-1\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n },\n }],\n }],\n}\n# Global\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# Global\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Message based\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"message-1\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "# Message based\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"messageId\":\n", - " \"message-1\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "# Global\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# Global\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 1: Import data rows with \"modelOutputs\" into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", @@ -299,351 +168,141 @@ " }\n", "]\n", "```\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Example of row_data with model outputs" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "pairwise_shopping_2 = \"\"\"\n {\n \"type\": \"application/vnd.labelbox.conversational\",\n \"version\": 1,\n \"messages\": [\n {\n \"messageId\": \"message-0\",\n \"timestampUsec\": 1530718491,\n \"content\": \"Hi! How can I help?\",\n \"user\": {\n \"userId\": \"Bot 002\",\n \"name\": \"Bot\"\n },\n \"align\": \"left\",\n \"canLabel\": false\n },\n {\n \"messageId\": \"message-1\",\n \"timestampUsec\": 1530718503,\n \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n \"user\": {\n \"userId\": \"User 00686\",\n \"name\": \"User\"\n },\n \"align\": \"right\",\n \"canLabel\": true\n }\n\n ],\n \"modelOutputs\": [\n {\n \"title\": \"Response A\",\n \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n },\n {\n \"title\": \"Response B\",\n \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n }\n ]\n}\n\"\"\"", + "cell_type": "code", "outputs": [], - "source": [ - "pairwise_shopping_2 = \"\"\"\n", - " {\n", - " \"type\": \"application/vnd.labelbox.conversational\",\n", - " \"version\": 1,\n", - " \"messages\": [\n", - " {\n", - " \"messageId\": \"message-0\",\n", - " \"timestampUsec\": 1530718491,\n", - " \"content\": \"Hi! How can I help?\",\n", - " \"user\": {\n", - " \"userId\": \"Bot 002\",\n", - " \"name\": \"Bot\"\n", - " },\n", - " \"align\": \"left\",\n", - " \"canLabel\": false\n", - " },\n", - " {\n", - " \"messageId\": \"message-1\",\n", - " \"timestampUsec\": 1530718503,\n", - " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", - " \"user\": {\n", - " \"userId\": \"User 00686\",\n", - " \"name\": \"User\"\n", - " },\n", - " \"align\": \"right\",\n", - " \"canLabel\": true\n", - " }\n", - "\n", - " ],\n", - " \"modelOutputs\": [\n", - " {\n", - " \"title\": \"Response A\",\n", - " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", - " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", - " },\n", - " {\n", - " \"title\": \"Response B\",\n", - " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", - " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", - " }\n", - " ]\n", - "}\n", - "\"\"\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\n\n# Upload data rows\nconvo_data = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n \"global_key\":\n global_key,\n}\n\n# Create a dataset\ndataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n# Create a datarows\ntask = dataset.create_data_rows([convo_data])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\n", - "\n", - "# Upload data rows\n", - "convo_data = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "# Create a dataset\n", - "dataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n", - "# Create a datarows\n", - "task = dataset.create_data_rows([convo_data])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create an ontology with relevant classifications\n\nontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n scope=lb.Classification.Scope.GLOBAL,\n name=\"Choose the best response\",\n options=[\n lb.Option(value=\"Response A\"),\n lb.Option(value=\"Response B\"),\n lb.Option(value=\"Tie\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"Provide a reason for your choice\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Pairwise comparison ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Conversational,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create an ontology with relevant classifications\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " name=\"Choose the best response\",\n", - " options=[\n", - " lb.Option(value=\"Response A\"),\n", - " lb.Option(value=\"Response B\"),\n", - " lb.Option(value=\"Tie\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"Provide a reason for your choice\",\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"checklist_convo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Pairwise comparison ontology\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Conversational,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create Labelbox project\nproject = client.create_project(\n name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n media_type=lb.MediaType.Conversational,\n)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "cell_type": "code", "outputs": [], - "source": [ - "# Create Labelbox project\n", - "project = client.create_project(\n", - " name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n", - " media_type=lb.MediaType.Conversational,\n", - ")\n", - "\n", - "# Setup your ontology\n", - "project.setup_editor(\n", - " ontology) # Connect your ontology and editor to your project" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a batch to send to your project\nbatch = project.create_batch(\n \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a batch to send to your project\n", - "batch = project.create_batch(\n", - " \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " ner_annotation,\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " nested_checklist_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "NDJSON annotation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n ner_annotation_ndjson,\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " ner_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 6: Upload annotations to a project as pre-labels or complete labels " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Model Assisted Labeling (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/dicom.ipynb b/examples/annotation_import/dicom.ipynb index 42a373e18..3f6aa0326 100644 --- a/examples/annotation_import/dicom.ipynb +++ b/examples/annotation_import/dicom.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# DICOM Annotation Import\n", @@ -41,237 +43,88 @@ " * Free form text classifications\n", " * Radio classifications \n", " * Checklist classifications" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = None\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = None\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for DICOM\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Polyline ########\npolyline_annotation = [\n lb_types.DICOMObjectAnnotation(\n name=\"line_dicom\",\n group_key=lb_types.GroupKey.AXIAL,\n frame=1,\n value=lb_types.Line(points=[\n lb_types.Point(x=10, y=10),\n lb_types.Point(x=200, y=20),\n lb_types.Point(x=250, y=250),\n ]),\n segment_index=0,\n keyframe=True,\n ),\n lb_types.DICOMObjectAnnotation(\n name=\"line_dicom\",\n group_key=lb_types.GroupKey.AXIAL,\n frame=20,\n value=lb_types.Line(points=[\n lb_types.Point(x=10, y=10),\n lb_types.Point(x=200, y=10),\n lb_types.Point(x=300, y=300),\n ]),\n segment_index=1,\n keyframe=True,\n ),\n]\n\npolyline_annotation_ndjson = {\n \"name\":\n \"line_dicom\",\n \"groupKey\":\n \"axial\", # should be 'axial', 'sagittal', or 'coronal'\n \"segments\": [\n {\n \"keyframes\": [{\n \"frame\":\n 1,\n \"line\": [\n {\n \"x\": 10,\n \"y\": 10\n },\n {\n \"x\": 200,\n \"y\": 20\n },\n {\n \"x\": 250,\n \"y\": 250\n },\n ],\n }]\n },\n {\n \"keyframes\": [{\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 10,\n \"y\": 10\n },\n {\n \"x\": 200,\n \"y\": 10\n },\n {\n \"x\": 300,\n \"y\": 300\n },\n ],\n }]\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Polyline ########\n", - "polyline_annotation = [\n", - " lb_types.DICOMObjectAnnotation(\n", - " name=\"line_dicom\",\n", - " group_key=lb_types.GroupKey.AXIAL,\n", - " frame=1,\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=10, y=10),\n", - " lb_types.Point(x=200, y=20),\n", - " lb_types.Point(x=250, y=250),\n", - " ]),\n", - " segment_index=0,\n", - " keyframe=True,\n", - " ),\n", - " lb_types.DICOMObjectAnnotation(\n", - " name=\"line_dicom\",\n", - " group_key=lb_types.GroupKey.AXIAL,\n", - " frame=20,\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=10, y=10),\n", - " lb_types.Point(x=200, y=10),\n", - " lb_types.Point(x=300, y=300),\n", - " ]),\n", - " segment_index=1,\n", - " keyframe=True,\n", - " ),\n", - "]\n", - "\n", - "polyline_annotation_ndjson = {\n", - " \"name\":\n", - " \"line_dicom\",\n", - " \"groupKey\":\n", - " \"axial\", # should be 'axial', 'sagittal', or 'coronal'\n", - " \"segments\": [\n", - " {\n", - " \"keyframes\": [{\n", - " \"frame\":\n", - " 1,\n", - " \"line\": [\n", - " {\n", - " \"x\": 10,\n", - " \"y\": 10\n", - " },\n", - " {\n", - " \"x\": 200,\n", - " \"y\": 20\n", - " },\n", - " {\n", - " \"x\": 250,\n", - " \"y\": 250\n", - " },\n", - " ],\n", - " }]\n", - " },\n", - " {\n", - " \"keyframes\": [{\n", - " \"frame\":\n", - " 20,\n", - " \"line\": [\n", - " {\n", - " \"x\": 10,\n", - " \"y\": 10\n", - " },\n", - " {\n", - " \"x\": 200,\n", - " \"y\": 10\n", - " },\n", - " {\n", - " \"x\": 300,\n", - " \"y\": 300\n", - " },\n", - " ],\n", - " }]\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Segmentation Masks ########\n\nmask_annotation = [\n lb_types.DICOMMaskAnnotation(\n group_key=\"axial\",\n frames=[\n lb_types.MaskFrame(\n index=1,\n instance_uri=\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n ),\n lb_types.MaskFrame(\n index=5,\n instance_uri=\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n ),\n ],\n instances=[\n lb_types.MaskInstance(color_rgb=(255, 255, 255),\n name=\"segmentation_mask_dicom\")\n ],\n )\n]\n\nmask_annotation_ndjson = {\n \"groupKey\": \"axial\",\n \"masks\": {\n \"frames\": [\n {\n \"index\":\n 1,\n \"instanceURI\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n },\n {\n \"index\":\n 5,\n \"instanceURI\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n },\n ],\n \"instances\": [{\n \"colorRGB\": (255, 255, 255),\n \"name\": \"segmentation_mask_dicom\"\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Segmentation Masks ########\n", - "\n", - "mask_annotation = [\n", - " lb_types.DICOMMaskAnnotation(\n", - " group_key=\"axial\",\n", - " frames=[\n", - " lb_types.MaskFrame(\n", - " index=1,\n", - " instance_uri=\n", - " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", - " ),\n", - " lb_types.MaskFrame(\n", - " index=5,\n", - " instance_uri=\n", - " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", - " ),\n", - " ],\n", - " instances=[\n", - " lb_types.MaskInstance(color_rgb=(255, 255, 255),\n", - " name=\"segmentation_mask_dicom\")\n", - " ],\n", - " )\n", - "]\n", - "\n", - "mask_annotation_ndjson = {\n", - " \"groupKey\": \"axial\",\n", - " \"masks\": {\n", - " \"frames\": [\n", - " {\n", - " \"index\":\n", - " 1,\n", - " \"instanceURI\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", - " },\n", - " {\n", - " \"index\":\n", - " 5,\n", - " \"instanceURI\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-mask-1.png\",\n", - " },\n", - " ],\n", - " \"instances\": [{\n", - " \"colorRGB\": (255, 255, 255),\n", - " \"name\": \"segmentation_mask_dicom\"\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"sample-dicom-1.dcm\" + str(uuid.uuid4())\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"dicom_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors :\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"sample-dicom-1.dcm\" + str(uuid.uuid4())\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/dicom-sample-data/sample-dicom-1.dcm\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"dicom_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors :\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -281,215 +134,133 @@ "\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(tools=[\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"segmentation_mask_dicom\",\n ),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_dicom\"),\n])\n\nontology = client.create_ontology(\n \"Ontology DICOM Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Dicom,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(tools=[\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", - " name=\"segmentation_mask_dicom\",\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_dicom\"),\n", - "])\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology DICOM Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Dicom,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"dicom_project_demo\",\n media_type=lb.MediaType.Dicom)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", - "# Queue mode will be deprecated once dataset mode is deprecated\n", - "\n", - "project = client.create_project(name=\"dicom_project_demo\",\n", - " media_type=lb.MediaType.Dicom)\n", - "\n", - "## connect ontology to your project\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create batches\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-dicom-demo\", # Each batch in a project must have a unique name\n global_keys=[global_key\n ], # a list of data row objects, data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create batches\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-dicom-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[global_key\n", - " ], # a list of data row objects, data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above.\n", "\n", "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "annotations_list = polyline_annotation + mask_annotation\nlabels = [\n lb_types.Label(data={\"global_key\": global_key},\n annotations=annotations_list)\n]", + "cell_type": "code", "outputs": [], - "source": [ - "annotations_list = polyline_annotation + mask_annotation\n", - "labels = [\n", - " lb_types.Label(data={\"global_key\": global_key},\n", - " annotations=annotations_list)\n", - "]" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations\n", "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\n\nfor annotation in [polyline_annotation_ndjson, mask_annotation_ndjson]:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "\n", - "for annotation in [polyline_annotation_ndjson, mask_annotation_ndjson]:\n", - " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotation)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job-\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)\nprint(\" \")", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload MAL label for this data row in project\n", - "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"mal_import_job-\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job_mal.wait_until_done()\n", - "print(\"Errors:\", upload_job_mal.errors)\n", - "print(\"Status of uploads: \", upload_job_mal.statuses)\n", - "print(\" \")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_label_import = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job-\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_label_import.wait_until_done()\nprint(\"Errors:\", upload_job_label_import.errors)\nprint(\"Status of uploads: \", upload_job_label_import.statuses)\nprint(\" \")", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_label_import = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job-\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_label_import.wait_until_done()\n", - "print(\"Errors:\", upload_job_label_import.errors)\n", - "print(\"Status of uploads: \", upload_job_label_import.statuses)\n", - "print(\" \")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Delete Project\n# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# Delete Project\n", - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/html.ipynb b/examples/annotation_import/html.ipynb index 7a1de2556..567482878 100644 --- a/examples/annotation_import/html.ipynb +++ b/examples/annotation_import/html.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# HTML Annotation Import\n", @@ -51,262 +53,118 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for HTML" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_html\",\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_html\",\n \"answer\": \"sample text\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Classification free text #####\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"text_html\",\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"text_html\",\n", - " \"answer\": \"sample text\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_html\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_html\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Checklist Classification #######\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_html\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_html\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_html\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_html\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Radio Classification ######\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_html\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_html\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio and Checklist (with subclassifcations) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio and Checklist (with subclassifcations) ##########\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample_html_1.html\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"html_annotation_import_demo_dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create one Labelbox dataset\n", - "\n", - "global_key = \"sample_html_1.html\" + str(uuid.uuid4())\n", - "\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_1.html\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(\n", - " name=\"html_annotation_import_demo_dataset\",\n", - " iam_integration=\n", - " None, # Removing this argument will default to the organziation's default iam integration\n", - ")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -314,271 +172,135 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_html`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_html`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT, name=\"text_html\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_html\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_html\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology HTML Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Html,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT, name=\"text_html\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_html\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_html\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - "])\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology HTML Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Html,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create Labelbox project\nproject = client.create_project(name=\"HTML Import Annotation Demo\",\n media_type=lb.MediaType.Html)\n\n# Setup your ontology\nproject.setup_editor(\n ontology) # Connect your ontology and editor to your project", + "cell_type": "code", "outputs": [], - "source": [ - "# Create Labelbox project\n", - "project = client.create_project(name=\"HTML Import Annotation Demo\",\n", - " media_type=lb.MediaType.Html)\n", - "\n", - "# Setup your ontology\n", - "project.setup_editor(\n", - " ontology) # Connect your ontology and editor to your project" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-html-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-html-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload our label using Model-Assisted Labeling\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb index be2bf6611..90ecf2123 100644 --- a/examples/annotation_import/image.ipynb +++ b/examples/annotation_import/image.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Image annotation import\n", @@ -57,969 +59,294 @@ "- Ability to create the payload in the NDJSON import format directly\n", "\n", "- It supports any levels of nested classification (free text / radio / checklist) under the object or classification annotation." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nfrom PIL import Image\nimport requests\nimport base64\nimport labelbox as lb\nimport labelbox.types as lb_types\nfrom io import BytesIO", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "from PIL import Image\n", - "import requests\n", - "import base64\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "from io import BytesIO" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key\n", "\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for image\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification : Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "nested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# NDJSON\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - ")\n", - "\n", - "# NDJSON\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Relationship with bounding box\n", "> **NOTE:** \n", "> Only supported for MAL imports" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=2096, y=1264),\n end=lb_types.Point(x=2240, y=1689),\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=2272, y=1346),\n end=lb_types.Point(x=2416, y=1704),\n ),\n)\n\nrelationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"uuid\": uuid_source,\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 1264.0,\n \"left\": 2096.0,\n \"height\": 425.0,\n \"width\": 144.0\n },\n}\n\nbbox_target_ndjson = {\n \"uuid\": uuid_target,\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 1346.0,\n \"left\": 2272.0,\n \"height\": 358.0,\n \"width\": 144.0\n },\n}\n\nrelationship_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "bbox_source = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=2096, y=1264),\n", - " end=lb_types.Point(x=2240, y=1689),\n", - " ),\n", - ")\n", - "\n", - "bbox_target = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=2272, y=1346),\n", - " end=lb_types.Point(x=2416, y=1704),\n", - " ),\n", - ")\n", - "\n", - "relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=bbox_source,\n", - " target=bbox_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "bbox_source_ndjson = {\n", - " \"uuid\": uuid_source,\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 1264.0,\n", - " \"left\": 2096.0,\n", - " \"height\": 425.0,\n", - " \"width\": 144.0\n", - " },\n", - "}\n", - "\n", - "bbox_target_ndjson = {\n", - " \"uuid\": uuid_target,\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 1346.0,\n", - " \"left\": 2272.0,\n", - " \"height\": 358.0,\n", - " \"width\": 144.0\n", - " },\n", - "}\n", - "\n", - "relationship_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_annotation_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 977,\n", - " \"left\": 1690,\n", - " \"height\": 330,\n", - " \"width\": 225\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n)\n\n## NDJSON\nbbox_with_radio_subclass_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "## NDJSON\n", - "bbox_with_radio_subclass_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 933,\n", - " \"left\": 541,\n", - " \"height\": 191,\n", - " \"width\": 330\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\", # must match your ontology feature\"s name\n value=lb_types.Polygon( # Coordinates for the vertices of your polygon\n points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\n# NDJSON\npolygon_annotation_ndjson = {\n \"name\":\n \"polygon\",\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon\", # must match your ontology feature\"s name\n", - " value=lb_types.Polygon( # Coordinates for the vertices of your polygon\n", - " points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "polygon_annotation_ndjson = {\n", - " \"name\":\n", - " \"polygon\",\n", - " \"polygon\": [\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " {\n", - " \"x\": 2278.306,\n", - " \"y\": 256.885\n", - " },\n", - " {\n", - " \"x\": 2428.197,\n", - " \"y\": 200.437\n", - " },\n", - " {\n", - " \"x\": 2560.0,\n", - " \"y\": 335.419\n", - " },\n", - " {\n", - " \"x\": 2557.386,\n", - " \"y\": 503.165\n", - " },\n", - " {\n", - " \"x\": 2320.596,\n", - " \"y\": 503.103\n", - " },\n", - " {\n", - " \"x\": 2156.083,\n", - " \"y\": 628.943\n", - " },\n", - " {\n", - " \"x\": 2161.111,\n", - " \"y\": 785.519\n", - " },\n", - " {\n", - " \"x\": 2002.115,\n", - " \"y\": 894.647\n", - " },\n", - " {\n", - " \"x\": 1838.456,\n", - " \"y\": 877.874\n", - " },\n", - " {\n", - " \"x\": 1436.53,\n", - " \"y\": 874.636\n", - " },\n", - " {\n", - " \"x\": 1411.403,\n", - " \"y\": 758.579\n", - " },\n", - " {\n", - " \"x\": 1353.853,\n", - " \"y\": 751.74\n", - " },\n", - " {\n", - " \"x\": 1345.264,\n", - " \"y\": 453.461\n", - " },\n", - " {\n", - " \"x\": 1426.011,\n", - " \"y\": 421.129\n", - " },\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Composite mask upload using different mask tools from the project's ontology\n", "This example shows how to assigned different annotations (mask instances) from a composite mask using different mask tools" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# First we need to extract all the unique colors from the composite mask\ndef extract_rgb_colors_from_url(image_url):\n response = requests.get(image_url)\n img = Image.open(BytesIO(response.content))\n\n colors = set()\n for x in range(img.width):\n for y in range(img.height):\n pixel = img.getpixel((x, y))\n if pixel[:3] != (0, 0, 0):\n colors.add(pixel[:3]) # Get only the RGB values\n\n return colors", + "cell_type": "code", "outputs": [], - "source": [ - "# First we need to extract all the unique colors from the composite mask\n", - "def extract_rgb_colors_from_url(image_url):\n", - " response = requests.get(image_url)\n", - " img = Image.open(BytesIO(response.content))\n", - "\n", - " colors = set()\n", - " for x in range(img.width):\n", - " for y in range(img.height):\n", - " pixel = img.getpixel((x, y))\n", - " if pixel[:3] != (0, 0, 0):\n", - " colors.add(pixel[:3]) # Get only the RGB values\n", - "\n", - " return colors" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "cp_mask_url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/composite_mask.png\"\ncolors = extract_rgb_colors_from_url(cp_mask_url)\nresponse = requests.get(cp_mask_url)\n\nmask_data = lb.types.MaskData(\n im_bytes=response.content\n) # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\nrgb_colors_for_mask_with_text_subclass_tool = [\n (73, 39, 85),\n (111, 87, 176),\n (23, 169, 254),\n]\n\ncp_mask = []\nfor color in colors:\n # We are assigning the color related to the mask_with_text_subclass tool by identifying the unique RGB colors\n if color in rgb_colors_for_mask_with_text_subclass_tool:\n cp_mask.append(\n lb_types.ObjectAnnotation(\n name=\n \"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data, color=color),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer sample\"),\n )\n ],\n ))\n else:\n # Create ObjectAnnotation for other masks\n cp_mask.append(\n lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=color)))\n\n# NDJSON using bytes array\ncp_mask_ndjson = []\n\n# Using bytes array.\nresponse = requests.get(cp_mask_url)\nim_bytes = base64.b64encode(response.content).decode(\"utf-8\")\nfor color in colors:\n if color in rgb_colors_for_mask_with_text_subclass_tool:\n cp_mask_ndjson.append({\n \"name\":\n \"mask_with_text_subclass\",\n \"mask\": {\n \"imBytes\": im_bytes,\n \"colorRGB\": color\n },\n \"classifications\": [{\n \"name\": \"sub_free_text\",\n \"answer\": \"free text answer\"\n }],\n })\n else:\n cp_mask_ndjson.append({\n \"name\": \"mask\",\n \"classifications\": [],\n \"mask\": {\n \"imBytes\": im_bytes,\n \"colorRGB\": color\n },\n })", + "cell_type": "code", "outputs": [], - "source": [ - "cp_mask_url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/composite_mask.png\"\n", - "colors = extract_rgb_colors_from_url(cp_mask_url)\n", - "response = requests.get(cp_mask_url)\n", - "\n", - "mask_data = lb.types.MaskData(\n", - " im_bytes=response.content\n", - ") # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\n", - "rgb_colors_for_mask_with_text_subclass_tool = [\n", - " (73, 39, 85),\n", - " (111, 87, 176),\n", - " (23, 169, 254),\n", - "]\n", - "\n", - "cp_mask = []\n", - "for color in colors:\n", - " # We are assigning the color related to the mask_with_text_subclass tool by identifying the unique RGB colors\n", - " if color in rgb_colors_for_mask_with_text_subclass_tool:\n", - " cp_mask.append(\n", - " lb_types.ObjectAnnotation(\n", - " name=\n", - " \"mask_with_text_subclass\", # must match your ontology feature\"s name\n", - " value=lb_types.Mask(mask=mask_data, color=color),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_free_text\",\n", - " value=lb_types.Text(answer=\"free text answer sample\"),\n", - " )\n", - " ],\n", - " ))\n", - " else:\n", - " # Create ObjectAnnotation for other masks\n", - " cp_mask.append(\n", - " lb_types.ObjectAnnotation(name=\"mask\",\n", - " value=lb_types.Mask(mask=mask_data,\n", - " color=color)))\n", - "\n", - "# NDJSON using bytes array\n", - "cp_mask_ndjson = []\n", - "\n", - "# Using bytes array.\n", - "response = requests.get(cp_mask_url)\n", - "im_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n", - "for color in colors:\n", - " if color in rgb_colors_for_mask_with_text_subclass_tool:\n", - " cp_mask_ndjson.append({\n", - " \"name\":\n", - " \"mask_with_text_subclass\",\n", - " \"mask\": {\n", - " \"imBytes\": im_bytes,\n", - " \"colorRGB\": color\n", - " },\n", - " \"classifications\": [{\n", - " \"name\": \"sub_free_text\",\n", - " \"answer\": \"free text answer\"\n", - " }],\n", - " })\n", - " else:\n", - " cp_mask_ndjson.append({\n", - " \"name\": \"mask\",\n", - " \"classifications\": [],\n", - " \"mask\": {\n", - " \"imBytes\": im_bytes,\n", - " \"colorRGB\": color\n", - " },\n", - " })" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\", # must match your ontology feature\"s name\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\": \"point\",\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point\", # must match your ontology feature\"s name\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "# NDJSON\n", - "point_annotation_ndjson = {\n", - " \"name\": \"point\",\n", - " \"classifications\": [],\n", - " \"point\": {\n", - " \"x\": 1166.606,\n", - " \"y\": 1441.768\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\", # must match your ontology feature\"s name\n value=lb_types.Line( # Coordinates for the keypoints in your polyline\n points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\n# NDJSON\npolyline_annotation_ndjson = {\n \"name\":\n \"polyline\",\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline\", # must match your ontology feature\"s name\n", - " value=lb_types.Line( # Coordinates for the keypoints in your polyline\n", - " points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "polyline_annotation_ndjson = {\n", - " \"name\":\n", - " \"polyline\",\n", - " \"classifications\": [],\n", - " \"line\": [\n", - " {\n", - " \"x\": 2534.353,\n", - " \"y\": 249.471\n", - " },\n", - " {\n", - " \"x\": 2429.492,\n", - " \"y\": 182.092\n", - " },\n", - " {\n", - " \"x\": 2294.322,\n", - " \"y\": 221.962\n", - " },\n", - " {\n", - " \"x\": 2224.491,\n", - " \"y\": 180.463\n", - " },\n", - " {\n", - " \"x\": 2136.123,\n", - " \"y\": 204.716\n", - " },\n", - " {\n", - " \"x\": 1712.247,\n", - " \"y\": 173.949\n", - " },\n", - " {\n", - " \"x\": 1703.838,\n", - " \"y\": 84.438\n", - " },\n", - " {\n", - " \"x\": 1579.772,\n", - " \"y\": 82.61\n", - " },\n", - " {\n", - " \"x\": 1583.442,\n", - " \"y\": 167.552\n", - " },\n", - " {\n", - " \"x\": 1478.869,\n", - " \"y\": 164.903\n", - " },\n", - " {\n", - " \"x\": 1418.941,\n", - " \"y\": 318.149\n", - " },\n", - " {\n", - " \"x\": 1243.128,\n", - " \"y\": 400.815\n", - " },\n", - " {\n", - " \"x\": 1022.067,\n", - " \"y\": 319.007\n", - " },\n", - " {\n", - " \"x\": 892.367,\n", - " \"y\": 379.216\n", - " },\n", - " {\n", - " \"x\": 670.273,\n", - " \"y\": 364.408\n", - " },\n", - " {\n", - " \"x\": 613.114,\n", - " \"y\": 288.16\n", - " },\n", - " {\n", - " \"x\": 377.559,\n", - " \"y\": 238.251\n", - " },\n", - " {\n", - " \"x\": 368.087,\n", - " \"y\": 185.064\n", - " },\n", - " {\n", - " \"x\": 246.557,\n", - " \"y\": 167.286\n", - " },\n", - " {\n", - " \"x\": 236.648,\n", - " \"y\": 285.61\n", - " },\n", - " {\n", - " \"x\": 90.929,\n", - " \"y\": 326.412\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# End-to-end example: Import pre-labels or ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into catalog\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"image-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", - "\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"image-demo-dataset\")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "print(dataset)", + "cell_type": "code", "outputs": [], - "source": [ - "print(dataset)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we created the bounding box annotation above, we provided the `name` as `bounding_box`. Now, when we setup our ontology, we must ensure that the name of the bounding box tool is also `bounding_box`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Annotation Import Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", - " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", - " name=\"mask_with_text_subclass\",\n", - " classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"sub_free_text\")\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", - " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Image Annotation Import Demo Ontology\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\n\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", - "# Queue mode will be deprecated once dataset mode is deprecated\n", - "project = client.create_project(name=\"Image Annotation Import Demo\",\n", - " media_type=lb.MediaType.Image)\n", - "\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "batch = project.create_batch(\n \"image-demo-batch\", # each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # paginated collection of data row objects, list of data row ids or global keys\n priority=1, # priority between 1(highest) - 5(lowest)\n)\n\nprint(f\"Batch: {batch}\")", + "cell_type": "code", "outputs": [], - "source": [ - "batch = project.create_batch(\n", - " \"image-demo-batch\", # each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # paginated collection of data row objects, list of data row ids or global keys\n", - " priority=1, # priority between 1(highest) - 5(lowest)\n", - ")\n", - "\n", - "print(f\"Batch: {batch}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", @@ -1027,156 +354,82 @@ "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. If you are using Python Annotation types, compose your annotations into Labels attached to the data rows." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Python annotations\n", "\n", "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n bbox_source,\n bbox_target,\n relationship,\n] + cp_mask\n\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "annotations = [\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " polygon_annotation,\n", - " point_annotation,\n", - " polyline_annotation,\n", - " bbox_source,\n", - " bbox_target,\n", - " relationship,\n", - "] + cp_mask\n", - "\n", - "label.append(\n", - " lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations\n", "Here we create the complete label ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nannotations = [\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n checklist_annotation_ndjson,\n text_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_ndjson,\n polygon_annotation_ndjson,\n point_annotation_ndjson,\n polyline_annotation_ndjson,\n bbox_source_ndjson,\n bbox_target_ndjson,\n relationship_ndjson, ## Only supported for MAL imports\n] + cp_mask_ndjson\n\nfor annotation in annotations:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "annotations = [\n", - " radio_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " bbox_with_radio_subclass_ndjson,\n", - " polygon_annotation_ndjson,\n", - " point_annotation_ndjson,\n", - " polyline_annotation_ndjson,\n", - " bbox_source_ndjson,\n", - " bbox_target_ndjson,\n", - " relationship_ndjson, ## Only supported for MAL imports\n", - "] + cp_mask_ndjson\n", - "\n", - "for annotation in annotations:\n", - " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotation)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload annotations to a project as pre-labels or ground truth\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# upload MAL labels for this data row in project\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_job\" + str(uuid.uuid4()),\n predictions=label,\n)\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")\nprint(f\"Status of uploads: {upload_job.statuses}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# upload MAL labels for this data row in project\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"mal_job\" + str(uuid.uuid4()),\n", - " predictions=label,\n", - ")\n", - "upload_job.wait_until_done()\n", - "\n", - "print(f\"Errors: {upload_job.errors}\")\n", - "print(f\"Status of uploads: {upload_job.statuses}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Relationships are not supported with LabelImport\n# For this demo either run MAL or Ground Truth, not both\n\n# Upload label for this data row in project\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=label)\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Relationships are not supported with LabelImport\n", - "# For this demo either run MAL or Ground Truth, not both\n", - "\n", - "# Upload label for this data row in project\n", - "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client,\n", - "# project_id = project.uid,\n", - "# name=\"label_import_job\"+str(uuid.uuid4()),\n", - "# labels=label)\n", - "\n", - "# print(\"Errors:\", upload_job.errors)\n", - "# print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 9143b0620..1a9f1fc50 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 1, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -54,538 +56,137 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_annotations = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_annotations_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", - " end=lb_types.Point(x=518.571,\n", - " y=245.143), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_annotation_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 135.3,\n", - " \"left\": 102.771,\n", - " \"height\": 109.843,\n", - " \"width\": 415.8\n", - " },\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", - " end=lb_types.Point(x=566.657,\n", - " y=420.986), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### Relationships ##########\n", - "entity_source = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_target = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "entity_source_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_source,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "\n", - "entity_target_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_target,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with relationships #############\n", - "# Python Annotation\n", - "bbox_source = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", - " end=lb_types.Point(x=270.907,\n", - " y=149.556), # x = left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_target = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=96.424, y=66.251),\n", - " end=lb_types.Point(x=179.074, y=146.932),\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=bbox_source,\n", - " target=bbox_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source_2 = str(uuid.uuid4())\n", - "uuid_target_2 = str(uuid.uuid4())\n", - "\n", - "bbox_source_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_source_2,\n", - " \"bbox\": {\n", - " \"top\": 68.875,\n", - " \"left\": 188.257,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_target_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_target_2,\n", - " \"bbox\": {\n", - " \"top\": 66.251,\n", - " \"left\": 96.424,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source_2,\n", - " \"target\": uuid_target_2,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -599,206 +200,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"PDF_annotation_demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -807,357 +262,124 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json_str)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "client.enable_experimental = True\n", - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = json.loads(output.json_str)\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - " \"Organic charge transfer salts based on the donor\",\n", - " \"the experimental investigations on this issue have not\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "text_selections_source = []\n", - "text_selections_target = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_annotations_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[2]:\n", - " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_source = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", - " text_selections_source.append(text_selection_entity_source)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_source_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_source, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[3]:\n", - " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_target = lb_types.DocumentTextSelection(\n", - " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", - " text_selections_target.append(text_selection_entity_target)\n", - " # build text selections forthe NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_target_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_target, # ids representing individual words from the group\n", - " page=1,\n", - " )" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# re-write the entity annotation with text selections\n", - "entities_annotation_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", textSelections=text_selections)\n", - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_annotation_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# re-write the entity source and target annotations withe text selectios\n", - "entity_source_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_source)\n", - "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_source_doc)\n", - "\n", - "entity_target_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_target)\n", - "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_target_doc)\n", - "\n", - "# re-write the entity relationship with the re-created entities\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", - "print(f\"entities_annotation={entities_annotation}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", - "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", - "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", - "print(f\"entity_source={entity_source}\")\n", - "print(f\"entity_target={entity_target}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " entity_source,\n", - " entity_target,\n", - " entity_relationship, # Only supported for MAL imports\n", - " bbox_source,\n", - " bbox_target,\n", - " bbox_relationship, # Only supported for MAL imports\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annot in [\n", - " entities_annotations_ndjson,\n", - " checklist_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " bbox_with_radio_subclass_annotation_ndjson,\n", - " ner_with_checklist_subclass_annotation_ndjson,\n", - " entity_source_ndjson,\n", - " entity_target_ndjson,\n", - " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", - " bbox_source_ndjson,\n", - " bbox_target_ndjson,\n", - " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Uncomment this code when excluding relationships from label import\n", - "## Relationships are not currently supported for label import\n", - "\n", - "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client,\n", - "# project_id = project.uid,\n", - "# name=\"label_import_job\"+str(uuid.uuid4()),\n", - "# labels=labels) ## Remove unsupported relationships from the labels list\n", - "\n", - "# print(\"Errors:\", upload_job.errors)\n", - "# print(\"Status of uploads: \", upload_job.statuses)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb index d71bad3f7..c682be2ed 100644 --- a/examples/annotation_import/text.ipynb +++ b/examples/annotation_import/text.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Text Annotation Import\n", @@ -53,299 +55,125 @@ "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n", "For information on what types of annotations are supported per data type, refer to the Import text annotations [documentation](https://docs.labelbox.com/reference/import-text-annotations)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly.\n", " * You may need to refresh your browser in order to see the results of the import job." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid\nimport json", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid\n", - "import json" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Python annotation types and NDJSON" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entities ##########\n\n# Python annotation\nnamed_entity = lb_types.TextEntity(start=10, end=20)\nnamed_entitity_annotation = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\")\n\n# NDJSON\nentities_ndjson = {\n \"name\": \"named_entity\",\n \"location\": {\n \"start\": 67,\n \"end\": 128\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entities ##########\n", - "\n", - "# Python annotation\n", - "named_entity = lb_types.TextEntity(start=10, end=20)\n", - "named_entitity_annotation = lb_types.ObjectAnnotation(value=named_entity,\n", - " name=\"named_entity\")\n", - "\n", - "# NDJSON\n", - "entities_ndjson = {\n", - " \"name\": \"named_entity\",\n", - " \"location\": {\n", - " \"start\": 67,\n", - " \"end\": 128\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio (single choice ) ##########\n\n# Python annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio (single choice ) ##########\n", - "\n", - "# Python annotation\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio and Checklist (with subclassifications) ##########\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# NDJSON\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Checklist (Multi-choice) ##########\n\n# Python annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n {\n \"name\": \"third_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Checklist (Multi-choice) ##########\n", - "\n", - "# Python annotation\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"third_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification Free-Form text ##########\n\n# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification Free-Form text ##########\n", - "\n", - "# Python annotation\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "# NDJSON\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annoations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file\nglobal_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\ntext_asset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n \"global_key\":\n global_key,\n \"media_type\":\n \"TEXT\",\n \"attachments\": [{\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n }],\n}\n\ndataset = client.create_dataset(\n name=\"text_annotation_import_demo_dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([text_asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file\n", - "global_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\n", - "text_asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n", - " \"global_key\":\n", - " global_key,\n", - " \"media_type\":\n", - " \"TEXT\",\n", - " \"attachments\": [{\n", - " \"type\":\n", - " \"TEXT_URL\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", - " }],\n", - "}\n", - "\n", - "dataset = client.create_dataset(\n", - " name=\"text_annotation_import_demo_dataset\",\n", - " iam_integration=\n", - " None, # Removing this argument will default to the organziation's default iam integration\n", - ")\n", - "task = dataset.create_data_rows([text_asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -354,129 +182,46 @@ "For example, when we create the checklist annotation above, we provided the `name` as `checklist_question`. Now, when we setup our ontology, we must ensure that the name of my classification tool is also `checklist_question`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification( # Text classification given the name \"text\"\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Text Annotations\",\n ontology_builder.asdict())", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[lb.Option(value=\"first_radio_answer\")],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " lb.Option(value=\"third_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification( # Text classification given the name \"text\"\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Ontology Text Annotations\",\n", - " ontology_builder.asdict())" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Text Annotation Import Demo\",\n media_type=lb.MediaType.Text)\n\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", - "# Queue mode will be deprecated once dataset mode is deprecated\n", - "\n", - "project = client.create_project(name=\"Text Annotation Import Demo\",\n", - " media_type=lb.MediaType.Text)\n", - "\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-text-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-text-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload\n", @@ -484,148 +229,88 @@ "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. If you are using Python Annotation types, compose your annotations into Labels attached to the data rows." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label\nlabels = []\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n named_entitity_annotation,\n radio_annotation,\n checklist_annotation,\n text_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label\n", - "labels = []\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " named_entitity_annotation,\n", - " radio_annotation,\n", - " checklist_annotation,\n", - " text_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "#### NDJSON annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n entities_ndjson,\n radio_annotation_ndjson,\n checklist_annotation_ndjson,\n text_annotation_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " entities_ndjson,\n", - " radio_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or ground truth\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python Annotation types). \n", "\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload MAL label for this data row in project\n", - "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"mal_import_job\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job_mal.wait_until_done()\n", - "print(\"Errors:\", upload_job_mal.errors)\n", - "print(\"Status of uploads: \", upload_job_mal.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job_label_import = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_label_import.wait_until_done()\nprint(\"Errors:\", upload_job_label_import.errors)\nprint(\"Status of uploads: \", upload_job_label_import.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job_label_import = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_label_import.wait_until_done()\n", - "print(\"Errors:\", upload_job_label_import.errors)\n", - "print(\"Status of uploads: \", upload_job_label_import.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/tiled.ipynb b/examples/annotation_import/tiled.ipynb index aa3885bfc..a5c0ea969 100644 --- a/examples/annotation_import/tiled.ipynb +++ b/examples/annotation_import/tiled.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Tiled Imagery Annotation Import\n", @@ -50,10 +52,10 @@ "\n", "For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Notes:\n", @@ -61,882 +63,283 @@ " * If you are importing more than 1,000 annotations at a time, consider submitting separate jobs, as they can take longer than other annotation types to import.\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly.\n", " * You may need to refresh your browser in order to see the results of the import job." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport numpy as np\nimport cv2\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import numpy as np\n", - "import cv2\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for tiled imagery" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Python annotation types and NDJSON " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Point #######\n\n# Python Annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\": \"point_geo\",\n \"point\": {\n \"x\": -99.20647859573366,\n \"y\": 19.40018029091072\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Point #######\n", - "\n", - "# Python Annotation\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point_geo\",\n", - " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", - ")\n", - "\n", - "# NDJSON\n", - "point_annotation_ndjson = {\n", - " \"name\": \"point_geo\",\n", - " \"point\": {\n", - " \"x\": -99.20647859573366,\n", - " \"y\": 19.40018029091072\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Polyline #######\n# Coordinates\ncoords = [\n [-99.20842051506044, 19.40032196622975],\n [-99.20809864997865, 19.39758963475322],\n [-99.20758366584778, 19.39776167179227],\n [-99.20728325843811, 19.3973265189299],\n]\n\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n value=lb_types.Line(points=line_points),\n)\n\n# NDJSON\npolyline_annotation_ndjson = {\n \"name\": \"polyline_geo\",\n \"line\": line_points_ndjson,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Polyline #######\n", - "# Coordinates\n", - "coords = [\n", - " [-99.20842051506044, 19.40032196622975],\n", - " [-99.20809864997865, 19.39758963475322],\n", - " [-99.20758366584778, 19.39776167179227],\n", - " [-99.20728325843811, 19.3973265189299],\n", - "]\n", - "\n", - "line_points = []\n", - "line_points_ndjson = []\n", - "\n", - "for sub in coords:\n", - " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline_geo\",\n", - " value=lb_types.Line(points=line_points),\n", - ")\n", - "\n", - "# NDJSON\n", - "polyline_annotation_ndjson = {\n", - " \"name\": \"polyline_geo\",\n", - " \"line\": line_points_ndjson,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Polygon #######\n# Coordinates in the desired EPSG coordinate system\ncoords_polygon = [\n [-99.21042680740356, 19.40036244486966],\n [-99.2104160785675, 19.40017017124035],\n [-99.2103409767151, 19.400008256428897],\n [-99.21014785766603, 19.400008256428897],\n [-99.21019077301027, 19.39983622176518],\n [-99.21022295951845, 19.399674306621385],\n [-99.21029806137086, 19.39951239131646],\n [-99.2102873325348, 19.399340356128437],\n [-99.21025514602663, 19.399117722085677],\n [-99.21024441719057, 19.39892544698541],\n [-99.2102336883545, 19.39874329141769],\n [-99.21021223068239, 19.398561135646027],\n [-99.21018004417421, 19.398399219233365],\n [-99.21011567115785, 19.39822718286836],\n [-99.20992255210878, 19.398136104719125],\n [-99.20974016189577, 19.398085505725305],\n [-99.20957922935487, 19.398004547302467],\n [-99.20939683914186, 19.39792358883935],\n [-99.20918226242067, 19.39786286996558],\n [-99.20899987220764, 19.397822390703805],\n [-99.20891404151918, 19.397994427496787],\n [-99.20890331268312, 19.398176583902874],\n [-99.20889258384706, 19.398368859888045],\n [-99.20889258384706, 19.398540896103246],\n [-99.20890331268312, 19.39872305189756],\n [-99.20889258384706, 19.39890520748796],\n [-99.20889258384706, 19.39907724313608],\n [-99.20889258384706, 19.399259398329956],\n [-99.20890331268312, 19.399431433603585],\n [-99.20890331268312, 19.39961358840092],\n [-99.20890331268312, 19.399785623300048],\n [-99.20897841453552, 19.399937418648214],\n [-99.20919299125673, 19.399937418648214],\n [-99.2093861103058, 19.39991717927664],\n [-99.20956850051881, 19.39996777770086],\n [-99.20961141586305, 19.40013981222548],\n [-99.20963287353517, 19.40032196622975],\n [-99.20978307724, 19.4004130431554],\n [-99.20996546745302, 19.40039280384301],\n [-99.21019077301027, 19.400372564528084],\n [-99.21042680740356, 19.40036244486966],\n]\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n value=lb_types.Polygon(points=polygon_points),\n)\n\n# NDJSON\npolygon_annotation_ndjson = {\n \"name\": \"polygon_geo\",\n \"polygon\": polygon_points_ndjson,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Polygon #######\n", - "# Coordinates in the desired EPSG coordinate system\n", - "coords_polygon = [\n", - " [-99.21042680740356, 19.40036244486966],\n", - " [-99.2104160785675, 19.40017017124035],\n", - " [-99.2103409767151, 19.400008256428897],\n", - " [-99.21014785766603, 19.400008256428897],\n", - " [-99.21019077301027, 19.39983622176518],\n", - " [-99.21022295951845, 19.399674306621385],\n", - " [-99.21029806137086, 19.39951239131646],\n", - " [-99.2102873325348, 19.399340356128437],\n", - " [-99.21025514602663, 19.399117722085677],\n", - " [-99.21024441719057, 19.39892544698541],\n", - " [-99.2102336883545, 19.39874329141769],\n", - " [-99.21021223068239, 19.398561135646027],\n", - " [-99.21018004417421, 19.398399219233365],\n", - " [-99.21011567115785, 19.39822718286836],\n", - " [-99.20992255210878, 19.398136104719125],\n", - " [-99.20974016189577, 19.398085505725305],\n", - " [-99.20957922935487, 19.398004547302467],\n", - " [-99.20939683914186, 19.39792358883935],\n", - " [-99.20918226242067, 19.39786286996558],\n", - " [-99.20899987220764, 19.397822390703805],\n", - " [-99.20891404151918, 19.397994427496787],\n", - " [-99.20890331268312, 19.398176583902874],\n", - " [-99.20889258384706, 19.398368859888045],\n", - " [-99.20889258384706, 19.398540896103246],\n", - " [-99.20890331268312, 19.39872305189756],\n", - " [-99.20889258384706, 19.39890520748796],\n", - " [-99.20889258384706, 19.39907724313608],\n", - " [-99.20889258384706, 19.399259398329956],\n", - " [-99.20890331268312, 19.399431433603585],\n", - " [-99.20890331268312, 19.39961358840092],\n", - " [-99.20890331268312, 19.399785623300048],\n", - " [-99.20897841453552, 19.399937418648214],\n", - " [-99.20919299125673, 19.399937418648214],\n", - " [-99.2093861103058, 19.39991717927664],\n", - " [-99.20956850051881, 19.39996777770086],\n", - " [-99.20961141586305, 19.40013981222548],\n", - " [-99.20963287353517, 19.40032196622975],\n", - " [-99.20978307724, 19.4004130431554],\n", - " [-99.20996546745302, 19.40039280384301],\n", - " [-99.21019077301027, 19.400372564528084],\n", - " [-99.21042680740356, 19.40036244486966],\n", - "]\n", - "\n", - "polygon_points = []\n", - "polygon_points_ndjson = []\n", - "\n", - "for sub in coords_polygon:\n", - " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon_geo\",\n", - " value=lb_types.Polygon(points=polygon_points),\n", - ")\n", - "\n", - "# NDJSON\n", - "polygon_annotation_ndjson = {\n", - " \"name\": \"polygon_geo\",\n", - " \"polygon\": polygon_points_ndjson,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Bounding Box #######\ncoord_object = {\n \"coordinates\": [[\n [-99.20746564865112, 19.39799442829336],\n [-99.20746564865112, 19.39925939999194],\n [-99.20568466186523, 19.39925939999194],\n [-99.20568466186523, 19.39799442829336],\n [-99.20746564865112, 19.39799442829336],\n ]]\n}\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\": \"bbox_geo\",\n \"bbox\": {\n \"top\":\n coord_object[\"coordinates\"][0][1][1],\n \"left\":\n coord_object[\"coordinates\"][0][1][0],\n \"height\":\n coord_object[\"coordinates\"][0][3][1] -\n coord_object[\"coordinates\"][0][1][1],\n \"width\":\n coord_object[\"coordinates\"][0][3][0] -\n coord_object[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Bounding Box #######\n", - "coord_object = {\n", - " \"coordinates\": [[\n", - " [-99.20746564865112, 19.39799442829336],\n", - " [-99.20746564865112, 19.39925939999194],\n", - " [-99.20568466186523, 19.39925939999194],\n", - " [-99.20568466186523, 19.39799442829336],\n", - " [-99.20746564865112, 19.39799442829336],\n", - " ]]\n", - "}\n", - "\n", - "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", - "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", - "\n", - "# Python Annotation\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_geo\",\n", - " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_annotation_ndjson = {\n", - " \"name\": \"bbox_geo\",\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object[\"coordinates\"][0][3][1] -\n", - " coord_object[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object[\"coordinates\"][0][3][0] -\n", - " coord_object[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - radio (single choice) #######\n\n# Python Annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question_geo\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - radio (single choice) #######\n", - "\n", - "# Python Annotation\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question_geo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question_geo\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - Checklist (multi-choice) #######\n\ncoord_object_checklist = {\n \"coordinates\": [[\n [-99.210266, 19.39540372195134],\n [-99.210266, 19.396901],\n [-99.20621067903966, 19.396901],\n [-99.20621067903966, 19.39540372195134],\n [-99.210266, 19.39540372195134],\n ]]\n}\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n)\n\n# NDJSON\nbbox_with_checklist_subclass_ndjson = {\n \"name\": \"bbox_checklist_geo\",\n \"classifications\": [{\n \"name\": \"checklist_class_name\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\"\n }],\n }],\n \"bbox\": {\n \"top\":\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_checklist[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_checklist[\"coordinates\"][0][3][1] -\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_checklist[\"coordinates\"][0][3][0] -\n coord_object_checklist[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - Checklist (multi-choice) #######\n", - "\n", - "coord_object_checklist = {\n", - " \"coordinates\": [[\n", - " [-99.210266, 19.39540372195134],\n", - " [-99.210266, 19.396901],\n", - " [-99.20621067903966, 19.396901],\n", - " [-99.20621067903966, 19.39540372195134],\n", - " [-99.210266, 19.39540372195134],\n", - " ]]\n", - "}\n", - "\n", - "# Python Annotation\n", - "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_checklist_geo\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", - " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_name\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_with_checklist_subclass_ndjson = {\n", - " \"name\": \"bbox_checklist_geo\",\n", - " \"classifications\": [{\n", - " \"name\": \"checklist_class_name\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\"\n", - " }],\n", - " }],\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object_checklist[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object_checklist[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object_checklist[\"coordinates\"][0][3][1] -\n", - " coord_object_checklist[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object_checklist[\"coordinates\"][0][3][0] -\n", - " coord_object_checklist[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification free form text with bbox #######\n\ncoord_object_text = {\n \"coordinates\": [[\n [-99.21019613742828, 19.397447957052933],\n [-99.21019613742828, 19.39772119262215],\n [-99.20986354351044, 19.39772119262215],\n [-99.20986354351044, 19.397447957052933],\n [-99.21019613742828, 19.397447957052933],\n ]]\n}\n# Python Annotation\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\n# NDJSON\nbbox_with_free_text_subclass_ndjson = {\n \"name\": \"bbox_text_geo\",\n \"classifications\": [{\n \"name\": \"free_text_geo\",\n \"answer\": \"sample text\"\n }],\n \"bbox\": {\n \"top\":\n coord_object_text[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_text[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_text[\"coordinates\"][0][3][1] -\n coord_object_text[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_text[\"coordinates\"][0][3][0] -\n coord_object_text[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification free form text with bbox #######\n", - "\n", - "coord_object_text = {\n", - " \"coordinates\": [[\n", - " [-99.21019613742828, 19.397447957052933],\n", - " [-99.21019613742828, 19.39772119262215],\n", - " [-99.20986354351044, 19.39772119262215],\n", - " [-99.20986354351044, 19.397447957052933],\n", - " [-99.21019613742828, 19.397447957052933],\n", - " ]]\n", - "}\n", - "# Python Annotation\n", - "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_text_geo\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.21019613742828,\n", - " y=19.397447957052933), # Top left\n", - " end=lb_types.Point(x=-99.20986354351044,\n", - " y=19.39772119262215), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", - " ],\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_with_free_text_subclass_ndjson = {\n", - " \"name\": \"bbox_text_geo\",\n", - " \"classifications\": [{\n", - " \"name\": \"free_text_geo\",\n", - " \"answer\": \"sample text\"\n", - " }],\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object_text[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object_text[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object_text[\"coordinates\"][0][3][1] -\n", - " coord_object_text[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object_text[\"coordinates\"][0][3][0] -\n", - " coord_object_text[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - Checklist (multi-choice) #######\n\n# Python Annotation\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question_geo\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n {\n \"name\": \"third_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - Checklist (multi-choice) #######\n", - "\n", - "# Python Annotation\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question_geo\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question_geo\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"third_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio and Checklist (with subclassifications) ##########\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# NDJSON\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Upload Annotations - putting it all together\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\nbottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n\nepsg = lb_types.EPSG.EPSG4326\nbounds = lb_types.TiledBounds(epsg=epsg,\n bounds=[top_left_bound, bottom_right_bound])\nglobal_key = \"mexico_city\" + str(uuid.uuid4())\n\ntile_layer = lb_types.TileLayer(\n url=\n \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n)\n\ntiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[17, 23])\n\nasset = {\n \"row_data\": tiled_image_data.asdict(),\n \"global_key\": global_key,\n \"media_type\": \"TMS_GEO\",\n}\n\ndataset = client.create_dataset(name=\"geo_demo_dataset\")\ntask = dataset.create_data_rows([asset])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\n", - "bottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n", - "\n", - "epsg = lb_types.EPSG.EPSG4326\n", - "bounds = lb_types.TiledBounds(epsg=epsg,\n", - " bounds=[top_left_bound, bottom_right_bound])\n", - "global_key = \"mexico_city\" + str(uuid.uuid4())\n", - "\n", - "tile_layer = lb_types.TileLayer(\n", - " url=\n", - " \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n", - ")\n", - "\n", - "tiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n", - " tile_bounds=bounds,\n", - " zoom_levels=[17, 23])\n", - "\n", - "asset = {\n", - " \"row_data\": tiled_image_data.asdict(),\n", - " \"global_key\": global_key,\n", - " \"media_type\": \"TMS_GEO\",\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"geo_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_checklist_geo\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_name\",\n options=[lb.Option(value=\"first_checklist_answer\")],\n ),\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_text_geo\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text_geo\"),\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question_geo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question_geo\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Geospatial Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Geospatial_Tile,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_checklist_geo\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class_name\",\n", - " options=[lb.Option(value=\"first_checklist_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_text_geo\",\n", - " classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text_geo\"),\n", - " ],\n", - " ),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question_geo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " lb.Option(value=\"third_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question_geo\",\n", - " options=[lb.Option(value=\"first_radio_answer\")],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Geospatial Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Geospatial_Tile,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project\n", "Connect the ontology to the labeling project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n# Queue mode will be deprecated once dataset mode is deprecated\n\nproject = client.create_project(name=\"Geospatial Project Demo\",\n media_type=lb.MediaType.Geospatial_Tile)\n\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", - "# Queue mode will be deprecated once dataset mode is deprecated\n", - "\n", - "project = client.create_project(name=\"Geospatial Project Demo\",\n", - " media_type=lb.MediaType.Geospatial_Tile)\n", - "\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-geo-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-geo-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotations\n", "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created on ***Supported Python annotation types and NDJSON*** section." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 python libraries\n\nhsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\nmask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\nkernel = np.ones((15, 20), np.uint8)\nmask = cv2.erode(mask, kernel)\nmask = cv2.dilate(mask, kernel)\nmask_annotation = lb_types.MaskData.from_2D_arr(mask)\nmask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\nh, w, _ = tiled_image_data.value.shape\npixel_bounds = lb_types.TiledBounds(\n epsg=lb_types.EPSG.SIMPLEPIXEL,\n bounds=[lb_types.Point(x=0, y=0),\n lb_types.Point(x=w, y=h)],\n)\ntransformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n src_epsg=pixel_bounds.epsg,\n pixel_bounds=pixel_bounds,\n geo_bounds=tiled_image_data.tile_bounds,\n zoom=20,\n)\npixel_polygons = mask_data.shapely.simplify(3)\nlist_of_polygons = [\n transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n]\npolygon_annotation_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n name=\"polygon_geo_2\")", + "cell_type": "code", "outputs": [], - "source": [ - "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 python libraries\n", - "\n", - "hsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\n", - "mask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\n", - "kernel = np.ones((15, 20), np.uint8)\n", - "mask = cv2.erode(mask, kernel)\n", - "mask = cv2.dilate(mask, kernel)\n", - "mask_annotation = lb_types.MaskData.from_2D_arr(mask)\n", - "mask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\n", - "h, w, _ = tiled_image_data.value.shape\n", - "pixel_bounds = lb_types.TiledBounds(\n", - " epsg=lb_types.EPSG.SIMPLEPIXEL,\n", - " bounds=[lb_types.Point(x=0, y=0),\n", - " lb_types.Point(x=w, y=h)],\n", - ")\n", - "transformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n", - " src_epsg=pixel_bounds.epsg,\n", - " pixel_bounds=pixel_bounds,\n", - " geo_bounds=tiled_image_data.tile_bounds,\n", - " zoom=20,\n", - ")\n", - "pixel_polygons = mask_data.shapely.simplify(3)\n", - "list_of_polygons = [\n", - " transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n", - "]\n", - "polygon_annotation_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n", - " name=\"polygon_geo_2\")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\nlabels.append(\n lb_types.Label(\n data={\n \"global_key\": global_key,\n \"tile_layer\": tile_layer,\n \"tile_bounds\": bounds,\n \"zoom_levels\": [12, 20],\n },\n annotations=[\n point_annotation,\n polyline_annotation,\n polygon_annotation,\n bbox_annotation,\n radio_annotation,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_annotation,\n polygon_annotation_two,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\n", - " \"global_key\": global_key,\n", - " \"tile_layer\": tile_layer,\n", - " \"tile_bounds\": bounds,\n", - " \"zoom_levels\": [12, 20],\n", - " },\n", - " annotations=[\n", - " point_annotation,\n", - " polyline_annotation,\n", - " polygon_annotation,\n", - " bbox_annotation,\n", - " radio_annotation,\n", - " bbox_with_checklist_subclass,\n", - " bbox_with_free_text_subclass,\n", - " checklist_annotation,\n", - " polygon_annotation_two,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### NDJSON annotations\n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created on *** Supported Python annotation types and NDJSON *** section." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\n\nfor annotations in [\n point_annotation_ndjson,\n polyline_annotation_ndjson,\n polygon_annotation_ndjson,\n bbox_annotation_ndjson,\n radio_annotation_ndjson,\n bbox_with_checklist_subclass_ndjson,\n bbox_with_free_text_subclass_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n nested_radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "\n", - "for annotations in [\n", - " point_annotation_ndjson,\n", - " polyline_annotation_ndjson,\n", - " polygon_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " bbox_with_checklist_subclass_ndjson,\n", - " bbox_with_free_text_subclass_ndjson,\n", - " checklist_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload MAL label for this data row in project\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload MAL label for this data row in project\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"mal_import_job\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_geo_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_geo_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/annotation_import/video.ipynb b/examples/annotation_import/video.ipynb index 0546e84be..8a9369c21 100644 --- a/examples/annotation_import/video.ipynb +++ b/examples/annotation_import/video.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Video Annotation Import\n", @@ -42,953 +44,221 @@ " * Polygons \n", "\n", "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nfrom PIL import Image\nimport requests\nimport base64\nimport labelbox as lb\nimport labelbox.types as lb_types\nfrom io import BytesIO\nimport pprint\n\npp = pprint.PrettyPrinter(indent=4)", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "from PIL import Image\n", - "import requests\n", - "import base64\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "from io import BytesIO\n", - "import pprint\n", - "\n", - "pp = pprint.PrettyPrinter(indent=4)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for video\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box: (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation class\n\n# bbox dimensions\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n\n# Python Annotation\nbbox_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\n# NDJSON\nbbox_annotation_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 13,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 19,\n \"bbox\": bbox_dm\n },\n ]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation class\n", - "\n", - "# bbox dimensions\n", - "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", - "\n", - "# Python Annotation\n", - "bbox_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"],\n", - " y=bbox_dm[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=19,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - "]\n", - "\n", - "# NDJSON\n", - "bbox_annotation_ndjson = {\n", - " \"name\":\n", - " \"bbox_video\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 13,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " {\n", - " \"frame\": 19,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " ]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Point (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\npoint_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n# NDJSON\npoint_annotation_ndjson = {\n \"name\":\n \"point_video\",\n \"segments\": [{\n \"keyframes\": [{\n \"frame\": 17,\n \"point\": {\n \"x\": 660.134,\n \"y\": 407.926\n }\n }]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "point_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"point_video\",\n", - " keyframe=True,\n", - " frame=17,\n", - " value=lb_types.Point(x=660.134, y=407.926),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "point_annotation_ndjson = {\n", - " \"name\":\n", - " \"point_video\",\n", - " \"segments\": [{\n", - " \"keyframes\": [{\n", - " \"frame\": 17,\n", - " \"point\": {\n", - " \"x\": 660.134,\n", - " \"y\": 407.926\n", - " }\n", - " }]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Polyline ########\n\n# Python Annotation\npolyline_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\n# NDJSON\npolyline_frame_annotation_ndjson = {\n \"name\":\n \"line_video_frame\",\n \"segments\": [\n {\n \"keyframes\": [\n {\n \"frame\":\n 5,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 100\n },\n {\n \"x\": 100,\n \"y\": 190\n },\n {\n \"x\": 190,\n \"y\": 220\n },\n ],\n },\n {\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 180\n },\n {\n \"x\": 100,\n \"y\": 200\n },\n {\n \"x\": 200,\n \"y\": 260\n },\n ],\n },\n ]\n },\n {\n \"keyframes\": [\n {\n \"frame\": 24,\n \"line\": [{\n \"x\": 300,\n \"y\": 310\n }, {\n \"x\": 330,\n \"y\": 430\n }],\n },\n {\n \"frame\": 45,\n \"line\": [{\n \"x\": 600,\n \"y\": 810\n }, {\n \"x\": 900,\n \"y\": 930\n }],\n },\n ]\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Polyline ########\n", - "\n", - "# Python Annotation\n", - "polyline_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=5,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=20,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=24,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=45,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - "]\n", - "\n", - "# NDJSON\n", - "polyline_frame_annotation_ndjson = {\n", - " \"name\":\n", - " \"line_video_frame\",\n", - " \"segments\": [\n", - " {\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\":\n", - " 5,\n", - " \"line\": [\n", - " {\n", - " \"x\": 680,\n", - " \"y\": 100\n", - " },\n", - " {\n", - " \"x\": 100,\n", - " \"y\": 190\n", - " },\n", - " {\n", - " \"x\": 190,\n", - " \"y\": 220\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"frame\":\n", - " 20,\n", - " \"line\": [\n", - " {\n", - " \"x\": 680,\n", - " \"y\": 180\n", - " },\n", - " {\n", - " \"x\": 100,\n", - " \"y\": 200\n", - " },\n", - " {\n", - " \"x\": 200,\n", - " \"y\": 260\n", - " },\n", - " ],\n", - " },\n", - " ]\n", - " },\n", - " {\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 24,\n", - " \"line\": [{\n", - " \"x\": 300,\n", - " \"y\": 310\n", - " }, {\n", - " \"x\": 330,\n", - " \"y\": 430\n", - " }],\n", - " },\n", - " {\n", - " \"frame\": 45,\n", - " \"line\": [{\n", - " \"x\": 600,\n", - " \"y\": 810\n", - " }, {\n", - " \"x\": 900,\n", - " \"y\": 930\n", - " }],\n", - " },\n", - " ]\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio and checklist (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\nradio_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n]\n\n## NDJSON\nframe_radio_classification_ndjson = {\n \"name\": \"radio_class\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"frames\": [{\n \"start\": 9,\n \"end\": 15\n }],\n },\n}\n\n# Python annotation\nchecklist_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n ),\n]\n\n## NDJSON\nframe_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n {\n \"name\": \"second_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "radio_annotation = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=9,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=15,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " ),\n", - "]\n", - "\n", - "## NDJSON\n", - "frame_radio_classification_ndjson = {\n", - " \"name\": \"radio_class\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"frames\": [{\n", - " \"start\": 9,\n", - " \"end\": 15\n", - " }],\n", - " },\n", - "}\n", - "\n", - "# Python annotation\n", - "checklist_annotation = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=29,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=35,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " ),\n", - "]\n", - "\n", - "## NDJSON\n", - "frame_checklist_classification_ndjson = {\n", - " \"name\":\n", - " \"checklist_class\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"frames\": [{\n", - " \"start\": 29,\n", - " \"end\": 35\n", - " }],\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"frames\": [{\n", - " \"start\": 29,\n", - " \"end\": 35\n", - " }],\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist and radio (global)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Global Classifications #######\n\n# Python Annotation\n## For global classifications use ClassificationAnnotation\nglobal_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n )\n]\n\n# NDJSON\nglobal_radio_classification_ndjson = {\n \"name\": \"radio_class_global\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}\n\n# Python annotation\nglobal_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n )\n]\n\n# NDJSON\nglobal_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class_global\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Global Classifications #######\n", - "\n", - "# Python Annotation\n", - "## For global classifications use ClassificationAnnotation\n", - "global_radio_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"radio_class_global\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "global_radio_classification_ndjson = {\n", - " \"name\": \"radio_class_global\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}\n", - "\n", - "# Python annotation\n", - "global_checklist_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_global\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "global_checklist_classification_ndjson = {\n", - " \"name\":\n", - " \"checklist_class_global\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist (global)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Nested Global Classification ###########\n\n# Python Annotation\nnested_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n )\n]\n\n# NDJSON\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}\n\n# Python Annotation\nnested_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n )\n]\n\n# NDJSON\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Nested Global Classification ###########\n", - "\n", - "# Python Annotation\n", - "nested_radio_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "# Python Annotation\n", - "nested_checklist_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### Free text classification ###########\ntext_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n )\n]\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### Free text classification ###########\n", - "text_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - " )\n", - "]\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with sub-classifications (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n\n# bounding box dimensions\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n\n# Python Annotation\nframe_bbox_with_checklist_subclass_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\")\n ]),\n )\n ],\n ),\n]\n\nframe_bbox_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"bbox_class\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 10,\n \"bbox\": bbox_dm2\n },\n {\n \"frame\":\n 11,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\": \"checklist_class\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\"\n }],\n }],\n },\n {\n \"frame\":\n 13,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\": \"checklist_class\",\n \"answer\": [{\n \"name\": \"second_checklist_answer\"\n }],\n }],\n },\n ]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n", - "\n", - "# bounding box dimensions\n", - "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", - "\n", - "# Python Annotation\n", - "frame_bbox_with_checklist_subclass_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=10,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", - " y=bbox_dm2[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=11,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"second_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - "]\n", - "\n", - "frame_bbox_with_checklist_subclass_annotation_ndjson = {\n", - " \"name\":\n", - " \"bbox_class\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 10,\n", - " \"bbox\": bbox_dm2\n", - " },\n", - " {\n", - " \"frame\":\n", - " 11,\n", - " \"bbox\":\n", - " bbox_dm2,\n", - " \"classifications\": [{\n", - " \"name\": \"checklist_class\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\"\n", - " }],\n", - " }],\n", - " },\n", - " {\n", - " \"frame\":\n", - " 13,\n", - " \"bbox\":\n", - " bbox_dm2,\n", - " \"classifications\": [{\n", - " \"name\": \"checklist_class\",\n", - " \"answer\": [{\n", - " \"name\": \"second_checklist_answer\"\n", - " }],\n", - " }],\n", - " },\n", - " ]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Masks (frame-based)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "def extract_rgb_colors_from_url(image_url):\n response = requests.get(image_url)\n img = Image.open(BytesIO(response.content))\n\n colors = set()\n for x in range(img.width):\n for y in range(img.height):\n pixel = img.getpixel((x, y))\n if pixel[:3] != (0, 0, 0):\n colors.add(pixel[:3]) # Get only the RGB values\n\n return colors", + "cell_type": "code", "outputs": [], - "source": [ - "def extract_rgb_colors_from_url(image_url):\n", - " response = requests.get(image_url)\n", - " img = Image.open(BytesIO(response.content))\n", - "\n", - " colors = set()\n", - " for x in range(img.width):\n", - " for y in range(img.height):\n", - " pixel = img.getpixel((x, y))\n", - " if pixel[:3] != (0, 0, 0):\n", - " colors.add(pixel[:3]) # Get only the RGB values\n", - "\n", - " return colors" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "### Raster Segmentation (Byte string array)\n## For this example we are going to to pass all the annotations payload in a single VideoMaskAnnotation\n\n# Single mask\nurl = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\nresponse = requests.get(url)\nimg_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n\n# We are generating our frames and instances in this step, and will later add them to the VideoMaskAnnotation that will contain\n# all frames and instances\nframes_mask_single = [\n lb_types.MaskFrame(\n index=20,\n im_bytes=response.\n content, # Instead of bytes you could also pass an instance URI : instance_uri=url\n )\n]\ninstances_mask_single = [\n lb_types.MaskInstance(color_rgb=(76, 104, 177), name=\"video_mask\")\n]\n\n## Add multiple masks using multiple tools in different frames - Note that only once composite mask can exist per frame\nframes_cp_mask_url = [\n {\n \"1\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_1_composite_mask.png\"\n },\n {\n \"24\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n },\n {\n \"26\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_26_composite_mask.png\"\n },\n]\n\nrgb_mask_tool = [(227, 135, 126), (169, 248, 152), (83, 152, 103)]\ncp_masks = []\nunique_colors = set()\n\nlb_frames = []\nlb_instances = []\ncounter = 0\n\nfor d in frames_cp_mask_url:\n for frame_no, v in d.items():\n response = requests.get(v)\n colors = extract_rgb_colors_from_url(v)\n for color in colors:\n if not color in unique_colors:\n unique_colors.add(color)\n name = (\"video_mask\" if color in rgb_mask_tool else\n \"mask_with_text_subclass\")\n lb_instances.append(\n lb_types.MaskInstance(color_rgb=color, name=name))\n counter += 1\n lb_frames.append(\n lb_types.MaskFrame(index=frame_no, im_bytes=response.content))\ncp_masks.append(\n lb_types.VideoMaskAnnotation(\n frames=lb_frames + frames_mask_single,\n instances=lb_instances + instances_mask_single,\n ))\n\npp.pprint(lb_frames)\npp.pprint(cp_masks)\n\n# NDJSON - single tool\nvideo_mask_ndjson_bytes_2 = {\n \"masks\": {\n \"frames\": [\n {\n \"index\": 31,\n \"imBytes\": img_bytes,\n },\n {\n \"index\": 34,\n \"imBytes\": img_bytes,\n },\n ],\n \"instances\": [{\n \"colorRGB\": [76, 104, 177],\n \"name\": \"video_mask\"\n }],\n }\n}", + "cell_type": "code", "outputs": [], - "source": [ - "### Raster Segmentation (Byte string array)\n", - "## For this example we are going to to pass all the annotations payload in a single VideoMaskAnnotation\n", - "\n", - "# Single mask\n", - "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n", - "response = requests.get(url)\n", - "img_bytes = base64.b64encode(response.content).decode(\"utf-8\")\n", - "\n", - "# We are generating our frames and instances in this step, and will later add them to the VideoMaskAnnotation that will contain\n", - "# all frames and instances\n", - "frames_mask_single = [\n", - " lb_types.MaskFrame(\n", - " index=20,\n", - " im_bytes=response.\n", - " content, # Instead of bytes you could also pass an instance URI : instance_uri=url\n", - " )\n", - "]\n", - "instances_mask_single = [\n", - " lb_types.MaskInstance(color_rgb=(76, 104, 177), name=\"video_mask\")\n", - "]\n", - "\n", - "## Add multiple masks using multiple tools in different frames - Note that only once composite mask can exist per frame\n", - "frames_cp_mask_url = [\n", - " {\n", - " \"1\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_1_composite_mask.png\"\n", - " },\n", - " {\n", - " \"24\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_24_composite_mask.png\"\n", - " },\n", - " {\n", - " \"26\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/frame_26_composite_mask.png\"\n", - " },\n", - "]\n", - "\n", - "rgb_mask_tool = [(227, 135, 126), (169, 248, 152), (83, 152, 103)]\n", - "cp_masks = []\n", - "unique_colors = set()\n", - "\n", - "lb_frames = []\n", - "lb_instances = []\n", - "counter = 0\n", - "\n", - "for d in frames_cp_mask_url:\n", - " for frame_no, v in d.items():\n", - " response = requests.get(v)\n", - " colors = extract_rgb_colors_from_url(v)\n", - " for color in colors:\n", - " if not color in unique_colors:\n", - " unique_colors.add(color)\n", - " name = (\"video_mask\" if color in rgb_mask_tool else\n", - " \"mask_with_text_subclass\")\n", - " lb_instances.append(\n", - " lb_types.MaskInstance(color_rgb=color, name=name))\n", - " counter += 1\n", - " lb_frames.append(\n", - " lb_types.MaskFrame(index=frame_no, im_bytes=response.content))\n", - "cp_masks.append(\n", - " lb_types.VideoMaskAnnotation(\n", - " frames=lb_frames + frames_mask_single,\n", - " instances=lb_instances + instances_mask_single,\n", - " ))\n", - "\n", - "pp.pprint(lb_frames)\n", - "pp.pprint(cp_masks)\n", - "\n", - "# NDJSON - single tool\n", - "video_mask_ndjson_bytes_2 = {\n", - " \"masks\": {\n", - " \"frames\": [\n", - " {\n", - " \"index\": 31,\n", - " \"imBytes\": img_bytes,\n", - " },\n", - " {\n", - " \"index\": 34,\n", - " \"imBytes\": img_bytes,\n", - " },\n", - " ],\n", - " \"instances\": [{\n", - " \"colorRGB\": [76, 104, 177],\n", - " \"name\": \"video_mask\"\n", - " }],\n", - " }\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Multiple instances of bounding box annotations in the same frame" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Fist instance of bounding box ranging from frame 22 to 27\nbbox_annotation_1 = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=22,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=27,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n# NDJSON example:\nbbox_frame_annotation_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 22,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 27,\n \"bbox\": bbox_dm2\n },\n ]\n }],\n}\n\n# Second instance of bounding box ranging from frame 22 to 27\nbbox_annotation_2 = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=22,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=27,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n ),\n]\n# NDJSON\nbbox_frame_annotation_ndjson2 = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 22,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 27,\n \"bbox\": bbox_dm2\n },\n ]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Fist instance of bounding box ranging from frame 22 to 27\n", - "bbox_annotation_1 = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=22,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"],\n", - " y=bbox_dm[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=27,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - "]\n", - "# NDJSON example:\n", - "bbox_frame_annotation_ndjson = {\n", - " \"name\":\n", - " \"bbox_video\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 22,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " {\n", - " \"frame\": 27,\n", - " \"bbox\": bbox_dm2\n", - " },\n", - " ]\n", - " }],\n", - "}\n", - "\n", - "# Second instance of bounding box ranging from frame 22 to 27\n", - "bbox_annotation_2 = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=22,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=27,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - "]\n", - "# NDJSON\n", - "bbox_frame_annotation_ndjson2 = {\n", - " \"name\":\n", - " \"bbox_video\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 22,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " {\n", - " \"frame\": 27,\n", - " \"bbox\": bbox_dm2\n", - " },\n", - " ]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## End-to-end example: Import pre-labels or ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"sample-video-jellyfish.mp4\" + str(uuid.uuid4())\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n \"global_key\":\n global_key,\n \"media_type\":\n \"VIDEO\",\n}\n\ndataset = client.create_dataset(\n name=\"video_demo_dataset\",\n iam_integration=\n None, # If this argument is removed, labelbox will use the default integration for your organization.\n)\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"sample-video-jellyfish.mp4\" + str(uuid.uuid4())\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", - " \"global_key\":\n", - " global_key,\n", - " \"media_type\":\n", - " \"VIDEO\",\n", - "}\n", - "\n", - "dataset = client.create_dataset(\n", - " name=\"video_demo_dataset\",\n", - " iam_integration=\n", - " None, # If this argument is removed, labelbox will use the default integration for your organization.\n", - ")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an ontology\n", @@ -998,347 +268,140 @@ "\n", "\n", "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_class\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## Need to defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class_global\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_global\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Video Annotation Import Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Video,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n", - " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_class\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"checklist_class\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", - " name=\"mask_with_text_subclass\",\n", - " classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"sub_free_text\")\n", - " ],\n", - " ),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class\",\n", - " scope=lb.Classification.Scope.\n", - " INDEX, ## Need to defined scope for frame classifications\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_class\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_class_global\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class_global\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Video Annotation Import Demo Ontology\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Video,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Create a labeling project \n", "Connect the ontology to the labeling project." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(name=\"Video Annotation Import Demo\",\n media_type=lb.MediaType.Video)\n\n## connect ontology to your project\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(name=\"Video Annotation Import Demo\",\n", - " media_type=lb.MediaType.Video)\n", - "\n", - "## connect ontology to your project\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "batch = project.create_batch(\n \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # A paginated collection of data row objects, a list of data rows or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "batch = project.create_batch(\n", - " \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # A paginated collection of data row objects, a list of data rows or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5: Create the annotations payload \n", "Create the annotations payload using the snippets of code above.\n", "\n", "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nannotations_list = [\n checklist_annotation,\n radio_annotation,\n bbox_annotation,\n frame_bbox_with_checklist_subclass_annotation,\n bbox_annotation_1,\n bbox_annotation_2,\n point_annotation,\n polyline_annotation,\n global_checklist_annotation,\n global_radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n text_annotation,\n cp_masks,\n]\n\nfor annotation in annotations_list:\n label.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotation))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "annotations_list = [\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " bbox_annotation,\n", - " frame_bbox_with_checklist_subclass_annotation,\n", - " bbox_annotation_1,\n", - " bbox_annotation_2,\n", - " point_annotation,\n", - " polyline_annotation,\n", - " global_checklist_annotation,\n", - " global_radio_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " text_annotation,\n", - " cp_masks,\n", - "]\n", - "\n", - "for annotation in annotations_list:\n", - " label.append(\n", - " lb_types.Label(data={\"global_key\": global_key}, annotations=annotation))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations\n", "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "First, let\"s update the bbox with nested classifications with the corresponding featureSchemaId" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\n\nannotations_list_ndjson = [\n point_annotation_ndjson,\n bbox_annotation_ndjson,\n polyline_frame_annotation_ndjson,\n frame_checklist_classification_ndjson,\n frame_radio_classification_ndjson,\n nested_radio_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n frame_bbox_with_checklist_subclass_annotation_ndjson,\n global_radio_classification_ndjson,\n global_checklist_classification_ndjson,\n text_annotation_ndjson,\n bbox_frame_annotation_ndjson,\n bbox_frame_annotation_ndjson2,\n video_mask_ndjson_bytes_2,\n]\n\nfor annotation in annotations_list_ndjson:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotation)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "\n", - "annotations_list_ndjson = [\n", - " point_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " polyline_frame_annotation_ndjson,\n", - " frame_checklist_classification_ndjson,\n", - " frame_radio_classification_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " frame_bbox_with_checklist_subclass_annotation_ndjson,\n", - " global_radio_classification_ndjson,\n", - " global_checklist_classification_ndjson,\n", - " text_annotation_ndjson,\n", - " bbox_frame_annotation_ndjson,\n", - " bbox_frame_annotation_ndjson2,\n", - " video_mask_ndjson_bytes_2,\n", - "]\n", - "\n", - "for annotation in annotations_list_ndjson:\n", - " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotation)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model-Assisted Labeling (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload MAL label for this data row in project\nupload_job_mal = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"mal_import_job-\" + str(uuid.uuid4()),\n predictions=label,\n)\n\nupload_job_mal.wait_until_done()\nprint(\"Errors:\", upload_job_mal.errors)\nprint(\"Status of uploads: \", upload_job_mal.statuses)\nprint(\" \")", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload MAL label for this data row in project\n", - "upload_job_mal = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"mal_import_job-\" + str(uuid.uuid4()),\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job_mal.wait_until_done()\n", - "print(\"Errors:\", upload_job_mal.errors)\n", - "print(\"Status of uploads: \", upload_job_mal.statuses)\n", - "print(\" \")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# For this demo either run MAL or Ground truth import, not both.\n\n# upload_job_label_import = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name = \"label_import_job-\" + str(uuid.uuid4()),\n# labels=label\n# )\n\n# upload_job_label_import.wait_until_done()\n# print(\"Errors:\", upload_job_label_import.errors)\n# print(\"Status of uploads: \", upload_job_label_import.statuses)\n# print(\" \")", + "cell_type": "code", "outputs": [], - "source": [ - "# For this demo either run MAL or Ground truth import, not both.\n", - "\n", - "# upload_job_label_import = lb.LabelImport.create_from_objects(\n", - "# client = client,\n", - "# project_id = project.uid,\n", - "# name = \"label_import_job-\" + str(uuid.uuid4()),\n", - "# labels=label\n", - "# )\n", - "\n", - "# upload_job_label_import.wait_until_done()\n", - "# print(\"Errors:\", upload_job_label_import.errors)\n", - "# print(\"Status of uploads: \", upload_job_label_import.statuses)\n", - "# print(\" \")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Delete Project\n# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# Delete Project\n", - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/basics/data_row_metadata.ipynb b/examples/basics/data_row_metadata.ipynb index 6e0df0157..8a63a0792 100644 --- a/examples/basics/data_row_metadata.ipynb +++ b/examples/basics/data_row_metadata.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,19 +24,19 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Data Row Metadata\n", "\n", "Metadata is useful to better understand data on the platform to help with labeling review, model diagnostics, and data selection. This **should not be confused with attachments**. Attachments provide additional context for labelers but is not searchable within Catalog." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Metadata ontology\n", @@ -57,115 +59,87 @@ "\n", "* **Embedding**: 128 float 32 vector used for similarity. To upload custom embeddings use the following [tutorial](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/basics/custom_embeddings.ipynb)\n", "* Any metadata kind can be customized" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nfrom datetime import datetime\nfrom pprint import pprint\nfrom labelbox.schema.data_row_metadata import DataRowMetadataKind\nfrom uuid import uuid4", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "from datetime import datetime\n", - "from pprint import pprint\n", - "from labelbox.schema.data_row_metadata import DataRowMetadataKind\n", - "from uuid import uuid4" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your API key\n", - "API_KEY = \"\"\n", - "# To get your API key go to: Workspace settings -> API -> Create API Key\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get the current metadata ontology " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "mdo = client.get_data_row_metadata_ontology()", + "cell_type": "code", "outputs": [], - "source": [ - "mdo = client.get_data_row_metadata_ontology()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# list all your metadata ontology as a dictionary accessable by id\nmetadata_ontologies = mdo.fields_by_id\npprint(metadata_ontologies, indent=2)", + "cell_type": "code", "outputs": [], - "source": [ - "# list all your metadata ontology as a dictionary accessable by id\n", - "metadata_ontologies = mdo.fields_by_id\n", - "pprint(metadata_ontologies, indent=2)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Access metadata by name" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "split_field = mdo.reserved_by_name[\"split\"]\nsplit_field", + "cell_type": "code", "outputs": [], - "source": [ - "split_field = mdo.reserved_by_name[\"split\"]\n", - "split_field" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "tag_field = mdo.reserved_by_name[\"tag\"]\ntag_field", + "cell_type": "code", "outputs": [], - "source": [ - "tag_field = mdo.reserved_by_name[\"tag\"]\n", - "tag_field" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "train_field = mdo.reserved_by_name[\"split\"][\"train\"]\ntrain_field", + "cell_type": "code", "outputs": [], - "source": [ - "train_field = mdo.reserved_by_name[\"split\"][\"train\"]\n", - "train_field" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Construct metadata fields for existing metadata schemas\n", @@ -174,283 +148,153 @@ "\n", "\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option 1: Specify metadata with a list of `DataRowMetadataField` objects. This is the recommended option since it comes with validation for metadata fields." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Construct a metadata field of string kind\ntag_metadata_field = lb.DataRowMetadataField(\n name=\"tag\",\n value=\"tag_string\",\n)\n\n# Construct an metadata field of datetime kind\ncapture_datetime_field = lb.DataRowMetadataField(\n name=\"captureDateTime\",\n value=datetime.utcnow(),\n)\n\n# Construct a metadata field of Enums options\nsplit_metadata_field = lb.DataRowMetadataField(\n name=\"split\",\n value=\"train\",\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Construct a metadata field of string kind\n", - "tag_metadata_field = lb.DataRowMetadataField(\n", - " name=\"tag\",\n", - " value=\"tag_string\",\n", - ")\n", - "\n", - "# Construct an metadata field of datetime kind\n", - "capture_datetime_field = lb.DataRowMetadataField(\n", - " name=\"captureDateTime\",\n", - " value=datetime.utcnow(),\n", - ")\n", - "\n", - "# Construct a metadata field of Enums options\n", - "split_metadata_field = lb.DataRowMetadataField(\n", - " name=\"split\",\n", - " value=\"train\",\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option 2: You can also specify the metadata fields with dictionary format without declaring the `DataRowMetadataField` objects.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Construct a dictionary of string metadata\ntag_metadata_field_dict = {\n \"name\": \"tag\",\n \"value\": \"tag_string\",\n}\n\n# Construct a dictionary of datetime metadata\ncapture_datetime_field_dict = {\n \"name\": \"captureDateTime\",\n \"value\": datetime.utcnow(),\n}\n\n# Construct a dictionary of Enums options metadata\nsplit_metadata_field_dict = {\n \"name\": \"split\",\n \"value\": \"train\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Construct a dictionary of string metadata\n", - "tag_metadata_field_dict = {\n", - " \"name\": \"tag\",\n", - " \"value\": \"tag_string\",\n", - "}\n", - "\n", - "# Construct a dictionary of datetime metadata\n", - "capture_datetime_field_dict = {\n", - " \"name\": \"captureDateTime\",\n", - " \"value\": datetime.utcnow(),\n", - "}\n", - "\n", - "# Construct a dictionary of Enums options metadata\n", - "split_metadata_field_dict = {\n", - " \"name\": \"split\",\n", - " \"value\": \"train\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create a custom metadata schema with their corresponding fields\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Final\ncustom_metadata_fields = []\n\n# Create the schema for the metadata\nnumber_schema = mdo.create_schema(name=\"numberMetadataCustom\",\n kind=DataRowMetadataKind.number)\n\n# Add fields to the metadata schema\ndata_row_metadata_fields_number = lb.DataRowMetadataField(\n name=number_schema.name, value=5.0)\n\ncustom_metadata_fields.append(data_row_metadata_fields_number)", + "cell_type": "code", "outputs": [], - "source": [ - "# Final\n", - "custom_metadata_fields = []\n", - "\n", - "# Create the schema for the metadata\n", - "number_schema = mdo.create_schema(name=\"numberMetadataCustom\",\n", - " kind=DataRowMetadataKind.number)\n", - "\n", - "# Add fields to the metadata schema\n", - "data_row_metadata_fields_number = lb.DataRowMetadataField(\n", - " name=number_schema.name, value=5.0)\n", - "\n", - "custom_metadata_fields.append(data_row_metadata_fields_number)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create the schema for an enum metadata\ncustom_metadata_fields = []\n\nenum_schema = mdo.create_schema(\n name=\"enumMetadata\",\n kind=DataRowMetadataKind.enum,\n options=[\"option1\", \"option2\"],\n)\n\n# Add fields to the metadata schema\ndata_row_metadata_fields_enum_1 = lb.DataRowMetadataField(name=enum_schema.name,\n value=\"option1\")\ncustom_metadata_fields.append(data_row_metadata_fields_enum_1)\n\ndata_row_metadata_fields_enum_2 = lb.DataRowMetadataField(name=enum_schema.name,\n value=\"option2\")\ncustom_metadata_fields.append(data_row_metadata_fields_enum_2)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create the schema for an enum metadata\n", - "custom_metadata_fields = []\n", - "\n", - "enum_schema = mdo.create_schema(\n", - " name=\"enumMetadata\",\n", - " kind=DataRowMetadataKind.enum,\n", - " options=[\"option1\", \"option2\"],\n", - ")\n", - "\n", - "# Add fields to the metadata schema\n", - "data_row_metadata_fields_enum_1 = lb.DataRowMetadataField(name=enum_schema.name,\n", - " value=\"option1\")\n", - "custom_metadata_fields.append(data_row_metadata_fields_enum_1)\n", - "\n", - "data_row_metadata_fields_enum_2 = lb.DataRowMetadataField(name=enum_schema.name,\n", - " value=\"option2\")\n", - "custom_metadata_fields.append(data_row_metadata_fields_enum_2)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Inspect the newly created metadata schemas\nmetadata_ontologies = mdo.fields_by_id\npprint(metadata_ontologies, indent=2)", + "cell_type": "code", "outputs": [], - "source": [ - "# Inspect the newly created metadata schemas\n", - "metadata_ontologies = mdo.fields_by_id\n", - "pprint(metadata_ontologies, indent=2)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create data rows with metadata\n", "\n", "See our [documentation](https://docs.labelbox.com/docs/limits) for information on limits for uploading data rows in a single API operation." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# A simple example of uploading data rows with metadata\ndataset = client.create_dataset(\n name=\"Simple Data Rows import with metadata example\")\nglobal_key = \"s_basic.jpg\" + str(uuid4())\ndata_row = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\",\n \"global_key\":\n global_key,\n}\n# This line works with dictionaries as well as schemas and fields created with DataRowMetadataField\ndata_row[\"metadata_fields\"] = custom_metadata_fields + [\n split_metadata_field,\n capture_datetime_field_dict,\n tag_metadata_field,\n]\n\ntask = dataset.create_data_rows([data_row])\ntask.wait_till_done()\nresult_task = task.result\nprint(result_task)", + "cell_type": "code", "outputs": [], - "source": [ - "# A simple example of uploading data rows with metadata\n", - "dataset = client.create_dataset(\n", - " name=\"Simple Data Rows import with metadata example\")\n", - "global_key = \"s_basic.jpg\" + str(uuid4())\n", - "data_row = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/basic.jpg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "# This line works with dictionaries as well as schemas and fields created with DataRowMetadataField\n", - "data_row[\"metadata_fields\"] = custom_metadata_fields + [\n", - " split_metadata_field,\n", - " capture_datetime_field_dict,\n", - " tag_metadata_field,\n", - "]\n", - "\n", - "task = dataset.create_data_rows([data_row])\n", - "task.wait_till_done()\n", - "result_task = task.result\n", - "print(result_task)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Update data row metadata" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get the data row that was uploaded in the previous cell\nnum_schema = mdo.get_by_name(\"numberMetadataCustom\")\n\n# Update the metadata\nupdated_metadata = lb.DataRowMetadataField(schema_id=num_schema.uid, value=10.2)\n\n# Create data row payload\ndata_row_payload = lb.DataRowMetadata(global_key=global_key,\n fields=[updated_metadata])\n\n# Upsert the fields with the update metadata for number-metadata\nmdo.bulk_upsert([data_row_payload])", + "cell_type": "code", "outputs": [], - "source": [ - "# Get the data row that was uploaded in the previous cell\n", - "num_schema = mdo.get_by_name(\"numberMetadataCustom\")\n", - "\n", - "# Update the metadata\n", - "updated_metadata = lb.DataRowMetadataField(schema_id=num_schema.uid, value=10.2)\n", - "\n", - "# Create data row payload\n", - "data_row_payload = lb.DataRowMetadata(global_key=global_key,\n", - " fields=[updated_metadata])\n", - "\n", - "# Upsert the fields with the update metadata for number-metadata\n", - "mdo.bulk_upsert([data_row_payload])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Update metadata schema" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# update a name\nnumber_schema = mdo.update_schema(name=\"numberMetadataCustom\",\n new_name=\"numberMetadataCustomNew\")\n\n# update an Enum metadata schema option's name, this only applies to Enum metadata schema.\nenum_schema = mdo.update_enum_option(name=\"enumMetadata\",\n option=\"option1\",\n new_option=\"option3\")", + "cell_type": "code", "outputs": [], - "source": [ - "# update a name\n", - "number_schema = mdo.update_schema(name=\"numberMetadataCustom\",\n", - " new_name=\"numberMetadataCustomNew\")\n", - "\n", - "# update an Enum metadata schema option's name, this only applies to Enum metadata schema.\n", - "enum_schema = mdo.update_enum_option(name=\"enumMetadata\",\n", - " option=\"option1\",\n", - " new_option=\"option3\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Accessing metadata\n", "\n", "You can examine an individual data row, including its metadata." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_row = next(dataset.data_rows())\nfor metadata_field in data_row.metadata_fields:\n print(metadata_field[\"name\"], \":\", metadata_field[\"value\"])", + "cell_type": "code", "outputs": [], - "source": [ - "data_row = next(dataset.data_rows())\n", - "for metadata_field in data_row.metadata_fields:\n", - " print(metadata_field[\"name\"], \":\", metadata_field[\"value\"])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "You can bulk export metadata using data row IDs." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_rows_metadata = mdo.bulk_export([data_row.uid])\nlen(data_rows_metadata)", + "cell_type": "code", "outputs": [], - "source": [ - "data_rows_metadata = mdo.bulk_export([data_row.uid])\n", - "len(data_rows_metadata)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Delete custom metadata schema \n", "You can delete custom metadata schema by name. If you wish to delete a metadata schema, uncomment the line below and insert the desired name." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# status = mdo.delete_schema(name=\"\")", + "cell_type": "code", "outputs": [], - "source": [ - "# status = mdo.delete_schema(name=\"\")" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/model_experiments/custom_metrics_demo.ipynb b/examples/model_experiments/custom_metrics_demo.ipynb index 7fff1a770..ed8516d2a 100644 --- a/examples/model_experiments/custom_metrics_demo.ipynb +++ b/examples/model_experiments/custom_metrics_demo.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "----\n", @@ -36,1528 +38,236 @@ " * Iterate faster\n", " * Measure and report on model quality\n", " * Understand marginal value of additional labels and modeling efforts\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Environment setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n )),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n ]),\n)\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n {\n \"name\":\n \"second_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - " lb_types.ClassificationAnswer(\n", - " name=\"second_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - " ]),\n", - ")\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"name\":\n", - " \"second_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"confidence\":\n 0.5,\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"confidence\":\n 0.5,\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\":\n \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_radio_question\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "bbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"bbox\": {\n", - " \"top\": 977,\n", - " \"left\": 1690,\n", - " \"height\": 330,\n", - " \"width\": 225\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n)\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "## NDJSON\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 933,\n", - " \"left\": 541,\n", - " \"height\": 191,\n", - " \"width\": 330\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Anotation\n", - "polygon_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "polygon_prediction_ndjson = {\n", - " \"name\":\n", - " \"polygon\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"polygon\": [\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " {\n", - " \"x\": 2278.306,\n", - " \"y\": 256.885\n", - " },\n", - " {\n", - " \"x\": 2428.197,\n", - " \"y\": 200.437\n", - " },\n", - " {\n", - " \"x\": 2560.0,\n", - " \"y\": 335.419\n", - " },\n", - " {\n", - " \"x\": 2557.386,\n", - " \"y\": 503.165\n", - " },\n", - " {\n", - " \"x\": 2320.596,\n", - " \"y\": 503.103\n", - " },\n", - " {\n", - " \"x\": 2156.083,\n", - " \"y\": 628.943\n", - " },\n", - " {\n", - " \"x\": 2161.111,\n", - " \"y\": 785.519\n", - " },\n", - " {\n", - " \"x\": 2002.115,\n", - " \"y\": 894.647\n", - " },\n", - " {\n", - " \"x\": 1838.456,\n", - " \"y\": 877.874\n", - " },\n", - " {\n", - " \"x\": 1436.53,\n", - " \"y\": 874.636\n", - " },\n", - " {\n", - " \"x\": 1411.403,\n", - " \"y\": 758.579\n", - " },\n", - " {\n", - " \"x\": 1353.853,\n", - " \"y\": 751.74\n", - " },\n", - " {\n", - " \"x\": 1345.264,\n", - " \"y\": 453.461\n", - " },\n", - " {\n", - " \"x\": 1426.011,\n", - " \"y\": 421.129\n", - " },\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\",\n", - " value=lb_types.Text(\n", - " answer=\"sample text\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "point_prediction = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "point_prediction_ndjson = {\n", - " \"name\": \"point\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [],\n", - " \"point\": {\n", - " \"x\": 1166.606,\n", - " \"y\": 1441.768\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "polyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "polyline_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")\n", - "\n", - "polyline_prediction_ndjson = {\n", - " \"name\":\n", - " \"polyline\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [],\n", - " \"line\": [\n", - " {\n", - " \"x\": 2534.353,\n", - " \"y\": 249.471\n", - " },\n", - " {\n", - " \"x\": 2429.492,\n", - " \"y\": 182.092\n", - " },\n", - " {\n", - " \"x\": 2294.322,\n", - " \"y\": 221.962\n", - " },\n", - " {\n", - " \"x\": 2224.491,\n", - " \"y\": 180.463\n", - " },\n", - " {\n", - " \"x\": 2136.123,\n", - " \"y\": 204.716\n", - " },\n", - " {\n", - " \"x\": 1712.247,\n", - " \"y\": 173.949\n", - " },\n", - " {\n", - " \"x\": 1703.838,\n", - " \"y\": 84.438\n", - " },\n", - " {\n", - " \"x\": 1579.772,\n", - " \"y\": 82.61\n", - " },\n", - " {\n", - " \"x\": 1583.442,\n", - " \"y\": 167.552\n", - " },\n", - " {\n", - " \"x\": 1478.869,\n", - " \"y\": 164.903\n", - " },\n", - " {\n", - " \"x\": 1418.941,\n", - " \"y\": 318.149\n", - " },\n", - " {\n", - " \"x\": 1243.128,\n", - " \"y\": 400.815\n", - " },\n", - " {\n", - " \"x\": 1022.067,\n", - " \"y\": 319.007\n", - " },\n", - " {\n", - " \"x\": 892.367,\n", - " \"y\": 379.216\n", - " },\n", - " {\n", - " \"x\": 670.273,\n", - " \"y\": 364.408\n", - " },\n", - " {\n", - " \"x\": 613.114,\n", - " \"y\": 288.16\n", - " },\n", - " {\n", - " \"x\": 377.559,\n", - " \"y\": 238.251\n", - " },\n", - " {\n", - " \"x\": 368.087,\n", - " \"y\": 185.064\n", - " },\n", - " {\n", - " \"x\": 246.557,\n", - " \"y\": 167.286\n", - " },\n", - " {\n", - " \"x\": 236.648,\n", - " \"y\": 285.61\n", - " },\n", - " {\n", - " \"x\": 90.929,\n", - " \"y\": 326.412\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\ntest_img_urls = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"Custom metrics demo\",\n iam_integration=None)\ntask = dataset.create_data_rows([test_img_urls])\n\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", - "test_img_urls = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"Custom metrics demo\",\n", - " iam_integration=None)\n", - "task = dataset.create_data_rows([test_img_urls])\n", - "\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of tools\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Image Prediction Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(\n name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(\n", - " name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid,\n", - ")\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -1565,376 +275,150 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section.\n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both (with exception of the uuid strings that are generated)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label for predictions\n", - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data=lb_types.ImageData(global_key=global_key),\n", - " annotations=[\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " polyline_prediction,\n", - " polygon_prediction,\n", - " point_prediction,\n", - " text_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction_ndjson = []\n", - "\n", - "for annot in [\n", - " radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " polygon_prediction_ndjson,\n", - " point_prediction_ndjson,\n", - " polyline_prediction_ndjson,\n", - " text_annotation_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - "]:\n", - " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_prediction_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for prediction uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations.\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"image_prediction_many_kinds\",\n", - " media_type=lb.MediaType.Image)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "########### Annotations ###########\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n annotations=annotations))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "label = []\n", - "annotations = [\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " polygon_annotation,\n", - " point_annotation,\n", - " polyline_annotation,\n", - "]\n", - "label.append(\n", - " lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n", - " annotations=annotations))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"annotation_import_\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/model_experiments/model_predictions_to_project.ipynb b/examples/model_experiments/model_predictions_to_project.ipynb index fd52ff5f8..ee86ff1b2 100644 --- a/examples/model_experiments/model_predictions_to_project.ipynb +++ b/examples/model_experiments/model_predictions_to_project.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,60 +24,47 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Import Model Run Predictions to a Project\n", "Throughout the process of training your machine learning (ML) model, you may want to export your model-run predictions and import them to your new project. In this notebook, we will demonstrate the process on how to get those predictions moved over." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nfrom labelbox.schema.conflict_resolution_strategy import (\n ConflictResolutionStrategy,)\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "from labelbox.schema.conflict_resolution_strategy import (\n", - " ConflictResolutionStrategy,)\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your API key\n", - "API_KEY = \"\"\n", - "# To get your API key go to: Workspace settings -> API -> Create API Key\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Creating Model Experiment\n", @@ -83,245 +72,148 @@ "In order to interact with Model Run predictions, you must create a Model Experiment with a Model Run and then add predictions. The steps below go over this process. See [Model](https://docs.labelbox.com/reference/model) from our developer guides for more information.\n", "\n", "To create a Model Experiment you will need to create an ontology. See [Ontology](https://docs.labelbox.com/reference/ontology) for more information" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "In this example we are making a simple ontology with a classification feature. The classification feature has two options: option 1 and option 2." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Demo Feature\",\n options=[lb.Option(value=\"option 1\"),\n lb.Option(value=\"option 2\")],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\"Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image)", + "cell_type": "code", "outputs": [], - "source": [ - "classification_features = [\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"Demo Feature\",\n", - " options=[lb.Option(value=\"option 1\"),\n", - " lb.Option(value=\"option 2\")],\n", - " )\n", - "]\n", - "\n", - "ontology_builder = lb.OntologyBuilder(tools=[],\n", - " classifications=classification_features)\n", - "\n", - "ontology = client.create_ontology(\"Demo Ontology\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Experiment" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model = client.create_model(name=f\"Model Experiment Demo {str(uuid.uuid4())}\",\n ontology_id=ontology.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "model = client.create_model(name=f\"Model Experiment Demo {str(uuid.uuid4())}\",\n", - " ontology_id=ontology.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Creating a Model Run from Model Experiment\n", "\n", "On this step we will need to create a dataset to attach data rows to our model run. See [Dataset](https://docs.labelbox.com/reference/dataset) for more information." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Dataset and Data Rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as data row for a dataset\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110\" + str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"foundry-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")\nprint(f\"Failed data rows: {task.failed_data_rows}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as data row for a dataset\n", - "global_key = \"2560px-Kitano_Street_Kobe01s5s4110\" + str(uuid.uuid4())\n", - "\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"foundry-demo-dataset\")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "\n", - "print(f\"Errors: {task.errors}\")\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Create Model Run and Attach Data Rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run_name = f\"Model Run Demo {str(uuid.uuid4())}\"\n\nmodel_run = model.create_model_run(name=model_run_name)", + "cell_type": "code", "outputs": [], - "source": [ - "model_run_name = f\"Model Run Demo {str(uuid.uuid4())}\"\n", - "\n", - "model_run = model.create_model_run(name=model_run_name)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Add Predictions\n", "In the below code snippet we are adding a sample predictions and attaching them to our data row inside our model run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"Demo Feature\",\n value=lb_types.Checklist(\n answer=[lb_types.ClassificationAnswer(name=\"option 1\", confidence=0.5)\n ]),\n)\n\n# Create prediction label\nlabel_prediction = [\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[checklist_prediction],\n )\n]\n\n# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"Demo Feature\",\n", - " value=lb_types.Checklist(\n", - " answer=[lb_types.ClassificationAnswer(name=\"option 1\", confidence=0.5)\n", - " ]),\n", - ")\n", - "\n", - "# Create prediction label\n", - "label_prediction = [\n", - " lb_types.Label(\n", - " data=lb_types.ImageData(global_key=global_key),\n", - " annotations=[checklist_prediction],\n", - " )\n", - "]\n", - "\n", - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for prediction uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup Project and Add Predictions\n", "In the steps below we will be creating our target project and setting up the project with the ontology we used with our model run. See [Project](https://docs.labelbox.com/reference/dataset) for more information." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a new project\nproject = client.create_project(name=\"Model Run Import Demo Project\",\n media_type=lb.MediaType.Image)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a new project\n", - "project = client.create_project(name=\"Model Run Import Demo Project\",\n", - " media_type=lb.MediaType.Image)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Ontology\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Ontology\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology Mapping\n", "To send prediction to your annotate project you will need to provide a ontology mapping python dictionary item. This matches ontology feature id to another. You would use this if your ontology was different from your model run to your project. In our case, since we are using the same ontology, you would just need to map the same feature id to each other." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get ontology dictionary to obtain featureSchemaIds\nontology_normalized = ontology.normalized\n\nPREDICTIONS_ONTOLOGY_MAPPING = {\n ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0]\n [\"featureSchemaId\"], # Classification featureSchemaID\n ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0][\"options\"][0]\n [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n ontology_normalized[\"classifications\"][0][\"options\"][1]\n [\"featureSchemaId\"],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Get ontology dictionary to obtain featureSchemaIds\n", - "ontology_normalized = ontology.normalized\n", - "\n", - "PREDICTIONS_ONTOLOGY_MAPPING = {\n", - " ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n", - " ontology_normalized[\"classifications\"][0]\n", - " [\"featureSchemaId\"], # Classification featureSchemaID\n", - " ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n", - " ontology_normalized[\"classifications\"][0][\"options\"][0]\n", - " [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n", - " ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n", - " ontology_normalized[\"classifications\"][0][\"options\"][1]\n", - " [\"featureSchemaId\"],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Send Model Predictions from Model Run to Annotate\n", @@ -343,79 +235,36 @@ " * ConflictResolutionStrategy.OverrideWithAnnotations\n", "* `param batch_priority`\n", " - The priority of the batch.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Import Predictions as pre-labels" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "send_to_annotations_params = {\n \"predictions_ontology_mapping\":\n PREDICTIONS_ONTOLOGY_MAPPING,\n \"exclude_data_rows_in_project\":\n False,\n \"override_existing_annotations_rule\":\n ConflictResolutionStrategy.OverrideWithPredictions,\n \"batch_priority\":\n 5,\n}\n\n# Send the predictions as pre-labels\nqueue_id = [\n queue.uid\n for queue in project.task_queues()\n if queue.queue_type == \"INITIAL_LABELING_QUEUE\"\n][0]\n\ntask = model_run.send_to_annotate_from_model(\n destination_project_id=project.uid,\n task_queue_id=\n queue_id, # ID of workflow task, set ID to None if you want to convert pre-labels to ground truths .\n batch_name=\"Prediction Import Demo Batch\",\n data_rows=lb.GlobalKeys(\n [global_key] # Provide a list of global keys from foundry app task\n ),\n params=send_to_annotations_params,\n)\n\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")", + "cell_type": "code", "outputs": [], - "source": [ - "send_to_annotations_params = {\n", - " \"predictions_ontology_mapping\":\n", - " PREDICTIONS_ONTOLOGY_MAPPING,\n", - " \"exclude_data_rows_in_project\":\n", - " False,\n", - " \"override_existing_annotations_rule\":\n", - " ConflictResolutionStrategy.OverrideWithPredictions,\n", - " \"batch_priority\":\n", - " 5,\n", - "}\n", - "\n", - "# Send the predictions as pre-labels\n", - "queue_id = [\n", - " queue.uid\n", - " for queue in project.task_queues()\n", - " if queue.queue_type == \"INITIAL_LABELING_QUEUE\"\n", - "][0]\n", - "\n", - "task = model_run.send_to_annotate_from_model(\n", - " destination_project_id=project.uid,\n", - " task_queue_id=\n", - " queue_id, # ID of workflow task, set ID to None if you want to convert pre-labels to ground truths .\n", - " batch_name=\"Prediction Import Demo Batch\",\n", - " data_rows=lb.GlobalKeys(\n", - " [global_key] # Provide a list of global keys from foundry app task\n", - " ),\n", - " params=send_to_annotations_params,\n", - ")\n", - "\n", - "task.wait_till_done()\n", - "\n", - "print(f\"Errors: {task.errors}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()\n# model_run.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()\n", - "# model_run.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/conversational_LLM_predictions.ipynb b/examples/prediction_upload/conversational_LLM_predictions.ipynb index 878a16649..7d0b889ad 100644 --- a/examples/prediction_upload/conversational_LLM_predictions.ipynb +++ b/examples/prediction_upload/conversational_LLM_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,295 +24,138 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# LLM pairwise comparison with Conversational text using Model\n", "\n", "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis in the model product.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Set up" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API Key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Supported annotations for conversational text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Entity" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ner_prediction = lb_types.ObjectAnnotation(\n name=\"ner\",\n confidence=0.5,\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nner_prediction_ndjson = {\n \"name\": \"ner\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"message-1\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "ner_prediction = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " confidence=0.5,\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", - ")\n", - "\n", - "ner_prediction_ndjson = {\n", - " \"name\": \"ner\",\n", - " \"confidence\": 0.5,\n", - " \"location\": {\n", - " \"start\": 0,\n", - " \"end\": 8\n", - " },\n", - " \"messageId\": \"message-1\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name=\"Response B\",\n confidence=0.5)),\n)\n\nradio_prediction_ndjson = {\n \"name\": \"Choose the best response\",\n \"answer\": {\n \"name\": \"Response B\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"Choose the best response\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name=\"Response B\",\n", - " confidence=0.5)),\n", - ")\n", - "\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"Choose the best response\",\n", - " \"answer\": {\n", - " \"name\": \"Response B\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "text_prediction = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\",\n confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is the more concise answer\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "text_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"Provide a reason for your choice\",\n", - " value=lb_types.Text(answer=\"the answer to the text questions right here\",\n", - " confidence=0.5),\n", - ")\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"Provide a reason for your choice\",\n", - " \"answer\": \"This is the more concise answer\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n \"messageId\": \"message-1\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - " message_id=\"message-1\", # Message specific annotation\n", - ")\n", - "\n", - "checklist_prediction_ndjson = {\n", - " \"name\": \"checklist_convo\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - " \"messageId\": \"message-1\",\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Message based\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"message-1\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n# Global\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n# Global\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Message based\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"message-1\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "# Message based\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"messageId\":\n", - " \"message-1\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "# Global\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# Global\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 1: Import data rows with \"modelOutputs\" into Catalog\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n", @@ -324,503 +169,218 @@ " }\n", "]\n", "```\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Example of row_data with model outputs" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "pairwise_shopping_2 = \"\"\"\n {\n \"type\": \"application/vnd.labelbox.conversational\",\n \"version\": 1,\n \"messages\": [\n {\n \"messageId\": \"message-0\",\n \"timestampUsec\": 1530718491,\n \"content\": \"Hi! How can I help?\",\n \"user\": {\n \"userId\": \"Bot 002\",\n \"name\": \"Bot\"\n },\n \"align\": \"left\",\n \"canLabel\": false\n },\n {\n \"messageId\": \"message-1\",\n \"timestampUsec\": 1530718503,\n \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n \"user\": {\n \"userId\": \"User 00686\",\n \"name\": \"User\"\n },\n \"align\": \"right\",\n \"canLabel\": true\n }\n\n ],\n \"modelOutputs\": [\n {\n \"title\": \"Response A\",\n \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n },\n {\n \"title\": \"Response B\",\n \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n }\n ]\n}\n\"\"\"", + "cell_type": "code", "outputs": [], - "source": [ - "pairwise_shopping_2 = \"\"\"\n", - " {\n", - " \"type\": \"application/vnd.labelbox.conversational\",\n", - " \"version\": 1,\n", - " \"messages\": [\n", - " {\n", - " \"messageId\": \"message-0\",\n", - " \"timestampUsec\": 1530718491,\n", - " \"content\": \"Hi! How can I help?\",\n", - " \"user\": {\n", - " \"userId\": \"Bot 002\",\n", - " \"name\": \"Bot\"\n", - " },\n", - " \"align\": \"left\",\n", - " \"canLabel\": false\n", - " },\n", - " {\n", - " \"messageId\": \"message-1\",\n", - " \"timestampUsec\": 1530718503,\n", - " \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n", - " \"user\": {\n", - " \"userId\": \"User 00686\",\n", - " \"name\": \"User\"\n", - " },\n", - " \"align\": \"right\",\n", - " \"canLabel\": true\n", - " }\n", - "\n", - " ],\n", - " \"modelOutputs\": [\n", - " {\n", - " \"title\": \"Response A\",\n", - " \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n", - " \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n", - " },\n", - " {\n", - " \"title\": \"Response B\",\n", - " \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n", - " \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n", - " }\n", - " ]\n", - "}\n", - "\"\"\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\nconvo_data = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n \"global_key\":\n global_key,\n}\n# Create a dataset\ndataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n# Create a datarows\ntask = dataset.create_data_rows([convo_data])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"pairwise_shooping_asset\" + str(uuid.uuid4())\n", - "convo_data = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "# Create a dataset\n", - "dataset = client.create_dataset(name=\"pairwise_prediction_demo\")\n", - "# Create a datarows\n", - "task = dataset.create_data_rows([convo_data])\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create an ontology with relevant classifications\n\nontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n scope=lb.Classification.Scope.GLOBAL,\n name=\"Choose the best response\",\n options=[\n lb.Option(value=\"Response A\"),\n lb.Option(value=\"Response B\"),\n lb.Option(value=\"Tie\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"Provide a reason for your choice\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Pairwise comparison ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Conversational,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create an ontology with relevant classifications\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\"),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " name=\"Choose the best response\",\n", - " options=[\n", - " lb.Option(value=\"Response A\"),\n", - " lb.Option(value=\"Response B\"),\n", - " lb.Option(value=\"Tie\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"Provide a reason for your choice\",\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"checklist_convo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Pairwise comparison ontology\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Conversational,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create model\nmodel = client.create_model(name=\"Comparison_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create model run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create model\n", - "model = client.create_model(name=\"Comparison_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create model run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Step 5: Create the predictions payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_prediction,\n text_prediction,\n checklist_prediction,\n radio_prediction,\n nested_radio_prediction,\n nested_checklist_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " ner_prediction,\n", - " text_prediction,\n", - " checklist_prediction,\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " nested_checklist_prediction,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "Setup the payload with the annotations that were created in Step 1." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n ner_prediction_ndjson,\n text_prediction_ndjson,\n checklist_prediction_ndjson,\n radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " ner_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(\n name=\"Conversational Human Evaluation Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(\n", - " name=\"Conversational Human Evaluation Demo\",\n", - " media_type=lb.MediaType.Conversational,\n", - ")\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"Choose the best response\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"Response B\")),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"message-1\", # Message specific annotation\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"message-1\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ner_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"message-1\"),\n", - ")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"Choose the best response\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"Response B\")),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"Provide a reason for your choice\",\n", - " value=lb_types.Text(answer=\"the answer to the text questions right here\"),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " message_id=\"message-1\", # Message specific annotation\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"message-1\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "7.4 Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_annotation = []\nlabel_annotation.append(\n lb_types.Label(\n data=lb_types.ConversationData(global_key=global_key),\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_annotation = []\n", - "label_annotation.append(\n", - " lb_types.Label(\n", - " data=lb_types.ConversationData(global_key=global_key),\n", - " annotations=[\n", - " ner_annotation,\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " nested_checklist_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label_annotation,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label_annotation,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/conversational_predictions.ipynb b/examples/prediction_upload/conversational_predictions.ipynb index 0cf5ede85..1b6da1ffc 100644 --- a/examples/prediction_upload/conversational_predictions.ipynb +++ b/examples/prediction_upload/conversational_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Conversational Text Prediction Import\n", @@ -48,434 +50,151 @@ "* Relationships\n", "\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification ###########\n\n# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification ###########\n", - "\n", - "# Python annotation\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\", confidence=0.5)),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# message based classifications\nner_prediction = lb_types.ObjectAnnotation(\n name=\"ner\",\n confidence=0.5,\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\nner_prediction_ndjson = {\n \"name\": \"ner\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 0,\n \"end\": 8\n },\n \"messageId\": \"4\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# message based classifications\n", - "ner_prediction = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " confidence=0.5,\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", - ")\n", - "\n", - "ner_prediction_ndjson = {\n", - " \"name\": \"ner\",\n", - " \"confidence\": 0.5,\n", - " \"location\": {\n", - " \"start\": 0,\n", - " \"end\": 8\n", - " },\n", - " \"messageId\": \"4\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Classification free text #####\n# Confidence scores are not supported for text predictions\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(\n answer=\"the answer to the text questions are right here\"),\n message_id=\"0\",\n)\n\ntext_prediction_ndjson = {\n \"name\": \"text_convo\",\n \"answer\": \"the answer to the text questions are right here\",\n \"messageId\": \"0\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Classification free text #####\n", - "# Confidence scores are not supported for text predictions\n", - "\n", - "text_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"text_convo\",\n", - " value=lb_types.Text(\n", - " answer=\"the answer to the text questions are right here\"),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"text_convo\",\n", - " \"answer\": \"the answer to the text questions are right here\",\n", - " \"messageId\": \"0\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Checklist Classification #######\n\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n message_id=\"2\",\n)\n\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_convo\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n \"messageId\": \"2\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Checklist Classification #######\n", - "\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - " message_id=\"2\",\n", - ")\n", - "\n", - "checklist_prediction_ndjson = {\n", - " \"name\": \"checklist_convo\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - " \"messageId\": \"2\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Radio Classification ######\n\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n message_id=\"0\",\n)\n\nradio_prediction_ndjson = {\n \"name\": \"radio_convo\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n \"messageId\": \"0\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Radio Classification ######\n", - "\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_convo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_convo\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " \"messageId\": \"0\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\n# Message based\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n# Message based\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"messageId\":\n \"10\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n# Global\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n# Global\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "# Message based\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"10\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "# Message based\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"messageId\":\n", - " \"10\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "# Global\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# Global\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create one Labelbox dataset\n\nglobal_key = \"conversation-1.json\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(\n name=\"conversational_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create one Labelbox dataset\n", - "\n", - "global_key = \"conversation-1.json\" + str(uuid.uuid4())\n", - "\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(\n", - " name=\"conversational_annotation_import_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\")],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n scope=lb.Classification.Scope.INDEX,\n name=\"text_convo\",\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n scope=lb.Classification.Scope.INDEX,\n name=\"checklist_convo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_convo\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\"Ontology Conversation Annotations\",\n ontology_builder.asdict())", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[lb.Tool(tool=lb.Tool.Type.NER, name=\"ner\")],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"text_convo\",\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " scope=lb.Classification.Scope.INDEX,\n", - " name=\"checklist_convo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_convo\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\"Ontology Conversation Annotations\",\n", - " ontology_builder.asdict())" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Mode and Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(\n name=\"Conversational_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(\n", - " name=\"Conversational_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid,\n", - ")\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -484,304 +203,155 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Python annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n ner_prediction,\n checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " ner_prediction,\n", - " checklist_prediction,\n", - " text_prediction,\n", - " radio_prediction,\n", - " nested_checklist_prediction,\n", - " nested_radio_prediction,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "if using NDJSON : " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction_ndjson = []\nfor annotations in [\n ner_prediction_ndjson,\n text_prediction_ndjson,\n checklist_prediction_ndjson,\n radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction_ndjson = []\n", - "for annotations in [\n", - " ner_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_prediction_ndjson.append(annotations)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7 : Send annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(\n name=\"Conversational Text Prediction Import Demo\",\n media_type=lb.MediaType.Conversational,\n)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(\n", - " name=\"Conversational Text Prediction Import Demo\",\n", - " media_type=lb.MediaType.Conversational,\n", - ")\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_convo_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ner_annotation = lb_types.ObjectAnnotation(\n name=\"ner\",\n value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_convo\",\n value=lb_types.Text(\n answer=\"the answer to the text questions are right here\"),\n message_id=\"0\",\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_convo\", # must match your ontology feature\"s name\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n message_id=\"2\",\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_convo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n message_id=\"0\",\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n message_id=\"10\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ner_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner\",\n", - " value=lb_types.ConversationEntity(start=0, end=8, message_id=\"4\"),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"text_convo\",\n", - " value=lb_types.Text(\n", - " answer=\"the answer to the text questions are right here\"),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_convo\", # must match your ontology feature\"s name\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " message_id=\"2\",\n", - ")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_convo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " message_id=\"0\",\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " message_id=\"10\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "7.4 Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data=lb_types.ConversationData(global_key=global_key),\n annotations=[\n ner_annotation,\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_radio_annotation,\n nested_checklist_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data=lb_types.ConversationData(global_key=global_key),\n", - " annotations=[\n", - " ner_annotation,\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " nested_checklist_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Option deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/geospatial_predictions.ipynb b/examples/prediction_upload/geospatial_predictions.ipynb index bc589cd81..d9035b969 100644 --- a/examples/prediction_upload/geospatial_predictions.ipynb +++ b/examples/prediction_upload/geospatial_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Geospatial Prediction Import \n", @@ -47,693 +49,174 @@ "\n", "\n", "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import os\n\nimport uuid\nimport numpy as np\nfrom PIL import Image\nimport cv2\n\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import os\n", - "\n", - "import uuid\n", - "import numpy as np\n", - "from PIL import Image\n", - "import cv2\n", - "\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions\n", "- Each cell shows the python annotation and the NDJson annotation for each annotation type." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Point #######\n\n# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n confidence=0.4,\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\": \"point_geo\",\n \"confidence\": 0.4,\n \"point\": {\n \"x\": -99.20647859573366,\n \"y\": 19.40018029091072\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Point #######\n", - "\n", - "# Python Annotation\n", - "point_prediction = lb_types.ObjectAnnotation(\n", - " name=\"point_geo\",\n", - " confidence=0.4,\n", - " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", - ")\n", - "\n", - "# NDJSON\n", - "point_prediction_ndjson = {\n", - " \"name\": \"point_geo\",\n", - " \"confidence\": 0.4,\n", - " \"point\": {\n", - " \"x\": -99.20647859573366,\n", - " \"y\": 19.40018029091072\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Polyline #######\n# Coordinates\ncoords = [\n [-99.20842051506044, 19.40032196622975],\n [-99.20809864997865, 19.39758963475322],\n [-99.20758366584778, 19.39776167179227],\n [-99.20728325843811, 19.3973265189299],\n]\n\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n confidence=0.5,\n value=lb_types.Line(points=line_points),\n)\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\": \"polyline_geo\",\n \"confidence\": 0.5,\n \"line\": line_points_ndjson,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Polyline #######\n", - "# Coordinates\n", - "coords = [\n", - " [-99.20842051506044, 19.40032196622975],\n", - " [-99.20809864997865, 19.39758963475322],\n", - " [-99.20758366584778, 19.39776167179227],\n", - " [-99.20728325843811, 19.3973265189299],\n", - "]\n", - "\n", - "line_points = []\n", - "line_points_ndjson = []\n", - "\n", - "for sub in coords:\n", - " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polyline_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polyline_geo\",\n", - " confidence=0.5,\n", - " value=lb_types.Line(points=line_points),\n", - ")\n", - "\n", - "# NDJSON\n", - "polyline_prediction_ndjson = {\n", - " \"name\": \"polyline_geo\",\n", - " \"confidence\": 0.5,\n", - " \"line\": line_points_ndjson,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Polygon #######\n# Coordinates in the desired EPSG coordinate system\ncoords_polygon = [\n [-99.21042680740356, 19.40036244486966],\n [-99.2104160785675, 19.40017017124035],\n [-99.2103409767151, 19.400008256428897],\n [-99.21014785766603, 19.400008256428897],\n [-99.21019077301027, 19.39983622176518],\n [-99.21022295951845, 19.399674306621385],\n [-99.21029806137086, 19.39951239131646],\n [-99.2102873325348, 19.399340356128437],\n [-99.21025514602663, 19.399117722085677],\n [-99.21024441719057, 19.39892544698541],\n [-99.2102336883545, 19.39874329141769],\n [-99.21021223068239, 19.398561135646027],\n [-99.21018004417421, 19.398399219233365],\n [-99.21011567115785, 19.39822718286836],\n [-99.20992255210878, 19.398136104719125],\n [-99.20974016189577, 19.398085505725305],\n [-99.20957922935487, 19.398004547302467],\n [-99.20939683914186, 19.39792358883935],\n [-99.20918226242067, 19.39786286996558],\n [-99.20899987220764, 19.397822390703805],\n [-99.20891404151918, 19.397994427496787],\n [-99.20890331268312, 19.398176583902874],\n [-99.20889258384706, 19.398368859888045],\n [-99.20889258384706, 19.398540896103246],\n [-99.20890331268312, 19.39872305189756],\n [-99.20889258384706, 19.39890520748796],\n [-99.20889258384706, 19.39907724313608],\n [-99.20889258384706, 19.399259398329956],\n [-99.20890331268312, 19.399431433603585],\n [-99.20890331268312, 19.39961358840092],\n [-99.20890331268312, 19.399785623300048],\n [-99.20897841453552, 19.399937418648214],\n [-99.20919299125673, 19.399937418648214],\n [-99.2093861103058, 19.39991717927664],\n [-99.20956850051881, 19.39996777770086],\n [-99.20961141586305, 19.40013981222548],\n [-99.20963287353517, 19.40032196622975],\n [-99.20978307724, 19.4004130431554],\n [-99.20996546745302, 19.40039280384301],\n [-99.21019077301027, 19.400372564528084],\n [-99.21042680740356, 19.40036244486966],\n]\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n confidence=0.5,\n value=lb_types.Polygon(points=polygon_points),\n)\n\n# NDJSON\npolygon_prediction_ndjson = {\n \"name\": \"polygon_geo\",\n \"confidence\": 0.5,\n \"polygon\": polygon_points_ndjson,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Polygon #######\n", - "# Coordinates in the desired EPSG coordinate system\n", - "coords_polygon = [\n", - " [-99.21042680740356, 19.40036244486966],\n", - " [-99.2104160785675, 19.40017017124035],\n", - " [-99.2103409767151, 19.400008256428897],\n", - " [-99.21014785766603, 19.400008256428897],\n", - " [-99.21019077301027, 19.39983622176518],\n", - " [-99.21022295951845, 19.399674306621385],\n", - " [-99.21029806137086, 19.39951239131646],\n", - " [-99.2102873325348, 19.399340356128437],\n", - " [-99.21025514602663, 19.399117722085677],\n", - " [-99.21024441719057, 19.39892544698541],\n", - " [-99.2102336883545, 19.39874329141769],\n", - " [-99.21021223068239, 19.398561135646027],\n", - " [-99.21018004417421, 19.398399219233365],\n", - " [-99.21011567115785, 19.39822718286836],\n", - " [-99.20992255210878, 19.398136104719125],\n", - " [-99.20974016189577, 19.398085505725305],\n", - " [-99.20957922935487, 19.398004547302467],\n", - " [-99.20939683914186, 19.39792358883935],\n", - " [-99.20918226242067, 19.39786286996558],\n", - " [-99.20899987220764, 19.397822390703805],\n", - " [-99.20891404151918, 19.397994427496787],\n", - " [-99.20890331268312, 19.398176583902874],\n", - " [-99.20889258384706, 19.398368859888045],\n", - " [-99.20889258384706, 19.398540896103246],\n", - " [-99.20890331268312, 19.39872305189756],\n", - " [-99.20889258384706, 19.39890520748796],\n", - " [-99.20889258384706, 19.39907724313608],\n", - " [-99.20889258384706, 19.399259398329956],\n", - " [-99.20890331268312, 19.399431433603585],\n", - " [-99.20890331268312, 19.39961358840092],\n", - " [-99.20890331268312, 19.399785623300048],\n", - " [-99.20897841453552, 19.399937418648214],\n", - " [-99.20919299125673, 19.399937418648214],\n", - " [-99.2093861103058, 19.39991717927664],\n", - " [-99.20956850051881, 19.39996777770086],\n", - " [-99.20961141586305, 19.40013981222548],\n", - " [-99.20963287353517, 19.40032196622975],\n", - " [-99.20978307724, 19.4004130431554],\n", - " [-99.20996546745302, 19.40039280384301],\n", - " [-99.21019077301027, 19.400372564528084],\n", - " [-99.21042680740356, 19.40036244486966],\n", - "]\n", - "\n", - "polygon_points = []\n", - "polygon_points_ndjson = []\n", - "\n", - "for sub in coords_polygon:\n", - " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polygon_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polygon_geo\",\n", - " confidence=0.5,\n", - " value=lb_types.Polygon(points=polygon_points),\n", - ")\n", - "\n", - "# NDJSON\n", - "polygon_prediction_ndjson = {\n", - " \"name\": \"polygon_geo\",\n", - " \"confidence\": 0.5,\n", - " \"polygon\": polygon_points_ndjson,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Bounding Box #######\ncoord_object = {\n \"coordinates\": [[\n [-99.20746564865112, 19.39799442829336],\n [-99.20746564865112, 19.39925939999194],\n [-99.20568466186523, 19.39925939999194],\n [-99.20568466186523, 19.39799442829336],\n [-99.20746564865112, 19.39799442829336],\n ]]\n}\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n confidence=0.5,\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\": \"bbox_geo\",\n \"confidence\": 0.5,\n \"bbox\": {\n \"top\":\n coord_object[\"coordinates\"][0][1][1],\n \"left\":\n coord_object[\"coordinates\"][0][1][0],\n \"height\":\n coord_object[\"coordinates\"][0][3][1] -\n coord_object[\"coordinates\"][0][1][1],\n \"width\":\n coord_object[\"coordinates\"][0][3][0] -\n coord_object[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Bounding Box #######\n", - "coord_object = {\n", - " \"coordinates\": [[\n", - " [-99.20746564865112, 19.39799442829336],\n", - " [-99.20746564865112, 19.39925939999194],\n", - " [-99.20568466186523, 19.39925939999194],\n", - " [-99.20568466186523, 19.39799442829336],\n", - " [-99.20746564865112, 19.39799442829336],\n", - " ]]\n", - "}\n", - "\n", - "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", - "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", - "\n", - "# Python Annotation\n", - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_geo\",\n", - " confidence=0.5,\n", - " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bbox_geo\",\n", - " \"confidence\": 0.5,\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object[\"coordinates\"][0][3][1] -\n", - " coord_object[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object[\"coordinates\"][0][3][0] -\n", - " coord_object[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - radio (single choice) #######\n\n# Python Annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question_geo\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - radio (single choice) #######\n", - "\n", - "# Python Annotation\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question_geo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question_geo\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - Checklist (multi-choice) #######\n\ncoord_object_checklist = {\n \"coordinates\": [[\n [-99.210266, 19.39540372195134],\n [-99.210266, 19.396901],\n [-99.20621067903966, 19.396901],\n [-99.20621067903966, 19.39540372195134],\n [-99.210266, 19.39540372195134],\n ]]\n}\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\n# NDJSON\nbbox_with_checklist_subclass_ndjson = {\n \"name\": \"bbox_checklist_geo\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"checklist_class_name\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"bbox\": {\n \"top\":\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_checklist[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_checklist[\"coordinates\"][0][3][1] -\n coord_object_checklist[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_checklist[\"coordinates\"][0][3][0] -\n coord_object_checklist[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - Checklist (multi-choice) #######\n", - "\n", - "coord_object_checklist = {\n", - " \"coordinates\": [[\n", - " [-99.210266, 19.39540372195134],\n", - " [-99.210266, 19.396901],\n", - " [-99.20621067903966, 19.396901],\n", - " [-99.20621067903966, 19.39540372195134],\n", - " [-99.210266, 19.39540372195134],\n", - " ]]\n", - "}\n", - "\n", - "# Python Annotation\n", - "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_checklist_geo\",\n", - " confidence=0.5,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", - " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_name\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_with_checklist_subclass_ndjson = {\n", - " \"name\": \"bbox_checklist_geo\",\n", - " \"confidence\": 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"checklist_class_name\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - " }],\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object_checklist[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object_checklist[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object_checklist[\"coordinates\"][0][3][1] -\n", - " coord_object_checklist[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object_checklist[\"coordinates\"][0][3][0] -\n", - " coord_object_checklist[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification free form text with bbox #######\n\ncoord_object_text = {\n \"coordinates\": [[\n [-99.21019613742828, 19.397447957052933],\n [-99.21019613742828, 19.39772119262215],\n [-99.20986354351044, 19.39772119262215],\n [-99.20986354351044, 19.397447957052933],\n [-99.21019613742828, 19.397447957052933],\n ]]\n}\n# Python Annotation\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\n# NDJSON\nbbox_with_free_text_subclass_ndjson = {\n \"name\": \"bbox_text_geo\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"free_text_geo\",\n \"confidence\": 0.5,\n \"answer\": \"sample text\"\n }],\n \"bbox\": {\n \"top\":\n coord_object_text[\"coordinates\"][0][1][1],\n \"left\":\n coord_object_text[\"coordinates\"][0][1][0],\n \"height\":\n coord_object_text[\"coordinates\"][0][3][1] -\n coord_object_text[\"coordinates\"][0][1][1],\n \"width\":\n coord_object_text[\"coordinates\"][0][3][0] -\n coord_object_text[\"coordinates\"][0][1][0],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification free form text with bbox #######\n", - "\n", - "coord_object_text = {\n", - " \"coordinates\": [[\n", - " [-99.21019613742828, 19.397447957052933],\n", - " [-99.21019613742828, 19.39772119262215],\n", - " [-99.20986354351044, 19.39772119262215],\n", - " [-99.20986354351044, 19.397447957052933],\n", - " [-99.21019613742828, 19.397447957052933],\n", - " ]]\n", - "}\n", - "# Python Annotation\n", - "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_text_geo\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.21019613742828,\n", - " y=19.397447957052933), # Top left\n", - " end=lb_types.Point(x=-99.20986354351044,\n", - " y=19.39772119262215), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", - " ],\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_with_free_text_subclass_ndjson = {\n", - " \"name\": \"bbox_text_geo\",\n", - " \"confidence\": 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"free_text_geo\",\n", - " \"confidence\": 0.5,\n", - " \"answer\": \"sample text\"\n", - " }],\n", - " \"bbox\": {\n", - " \"top\":\n", - " coord_object_text[\"coordinates\"][0][1][1],\n", - " \"left\":\n", - " coord_object_text[\"coordinates\"][0][1][0],\n", - " \"height\":\n", - " coord_object_text[\"coordinates\"][0][3][1] -\n", - " coord_object_text[\"coordinates\"][0][1][1],\n", - " \"width\":\n", - " coord_object_text[\"coordinates\"][0][3][0] -\n", - " coord_object_text[\"coordinates\"][0][1][0],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Classification - Checklist (multi-choice) #######\n\n# Python Annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question_geo\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"third_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Classification - Checklist (multi-choice) #######\n", - "\n", - "# Python Annotation\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question_geo\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question_geo\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"third_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio and Checklist (with subclassifications) ##########\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.2)),\n )\n ],\n )),\n)\n# NDJSON\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.2,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.3\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio and Checklist (with subclassifications) ##########\n", - "\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.2)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "# NDJSON\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.2,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.3\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\nbottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n\nepsg = lb_types.EPSG.EPSG4326\nbounds = lb_types.TiledBounds(epsg=epsg,\n bounds=[top_left_bound, bottom_right_bound])\nglobal_key = \"mexico_city\" + uuid.uuid4()\n\ntile_layer = lb_types.TileLayer(\n url=\n \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n)\n\ntiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[17, 23])\n\nasset = {\n \"row_data\": tiled_image_data.asdict(),\n \"global_key\": global_key,\n \"media_type\": \"TMS_GEO\",\n}\n\ndataset = client.create_dataset(name=\"geo_demo_dataset\")\ntask = dataset.create_data_rows([asset])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "top_left_bound = lb_types.Point(x=-99.21052827588443, y=19.400498983095076)\n", - "bottom_right_bound = lb_types.Point(x=-99.20534818927473, y=19.39533555271248)\n", - "\n", - "epsg = lb_types.EPSG.EPSG4326\n", - "bounds = lb_types.TiledBounds(epsg=epsg,\n", - " bounds=[top_left_bound, bottom_right_bound])\n", - "global_key = \"mexico_city\" + uuid.uuid4()\n", - "\n", - "tile_layer = lb_types.TileLayer(\n", - " url=\n", - " \"https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png\"\n", - ")\n", - "\n", - "tiled_image_data = lb_types.TiledImageData(tile_layer=tile_layer,\n", - " tile_bounds=bounds,\n", - " zoom_levels=[17, 23])\n", - "\n", - "asset = {\n", - " \"row_data\": tiled_image_data.asdict(),\n", - " \"global_key\": global_key,\n", - " \"media_type\": \"TMS_GEO\",\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"geo_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_checklist_geo\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_name\",\n options=[lb.Option(value=\"first_checklist_answer\")],\n ),\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_text_geo\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text_geo\"),\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question_geo\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question_geo\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Geospatial Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Geospatial_Tile,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo\"),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon_geo_2\"),\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_geo\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_checklist_geo\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class_name\",\n", - " options=[lb.Option(value=\"first_checklist_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_text_geo\",\n", - " classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text_geo\"),\n", - " ],\n", - " ),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question_geo\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " lb.Option(value=\"third_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question_geo\",\n", - " options=[lb.Option(value=\"first_radio_answer\")],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Geospatial Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Geospatial_Tile,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"geospatial_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"geospatial_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -741,420 +224,156 @@ "Create the annotations payload using the snippets in the **Supported Predictions Section**. \n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 and PIL python libraries\n\nhsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\nmask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\nkernel = np.ones((15, 20), np.uint8)\nmask = cv2.erode(mask, kernel)\nmask = cv2.dilate(mask, kernel)\nmask_annotation = lb_types.MaskData.from_2D_arr(mask)\nmask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\nh, w, _ = tiled_image_data.value.shape\npixel_bounds = lb_types.TiledBounds(\n epsg=lb_types.EPSG.SIMPLEPIXEL,\n bounds=[lb_types.Point(x=0, y=0),\n lb_types.Point(x=w, y=h)],\n)\ntransformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n src_epsg=pixel_bounds.epsg,\n pixel_bounds=pixel_bounds,\n geo_bounds=tiled_image_data.tile_bounds,\n zoom=23,\n)\npixel_polygons = mask_data.shapely.simplify(3)\nlist_of_polygons = [\n transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n]\npolygon_prediction_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n name=\"polygon_geo_2\",\n confidence=0.5)", + "cell_type": "code", "outputs": [], - "source": [ - "## Lets create another polygon annotation with python annotation tools that draws the image using cv2 and PIL python libraries\n", - "\n", - "hsv = cv2.cvtColor(tiled_image_data.value, cv2.COLOR_RGB2HSV)\n", - "mask = cv2.inRange(hsv, (25, 50, 25), (100, 150, 255))\n", - "kernel = np.ones((15, 20), np.uint8)\n", - "mask = cv2.erode(mask, kernel)\n", - "mask = cv2.dilate(mask, kernel)\n", - "mask_annotation = lb_types.MaskData.from_2D_arr(mask)\n", - "mask_data = lb_types.Mask(mask=mask_annotation, color=[255, 255, 255])\n", - "h, w, _ = tiled_image_data.value.shape\n", - "pixel_bounds = lb_types.TiledBounds(\n", - " epsg=lb_types.EPSG.SIMPLEPIXEL,\n", - " bounds=[lb_types.Point(x=0, y=0),\n", - " lb_types.Point(x=w, y=h)],\n", - ")\n", - "transformer = lb_types.EPSGTransformer.create_pixel_to_geo_transformer(\n", - " src_epsg=pixel_bounds.epsg,\n", - " pixel_bounds=pixel_bounds,\n", - " geo_bounds=tiled_image_data.tile_bounds,\n", - " zoom=23,\n", - ")\n", - "pixel_polygons = mask_data.shapely.simplify(3)\n", - "list_of_polygons = [\n", - " transformer(lb_types.Polygon.from_shapely(p)) for p in pixel_polygons.geoms\n", - "]\n", - "polygon_prediction_two = lb_types.ObjectAnnotation(value=list_of_polygons[0],\n", - " name=\"polygon_geo_2\",\n", - " confidence=0.5)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\nlabels.append(\n lb_types.Label(\n data={\n \"global_key\": global_key,\n \"tile_layer\": tile_layer,\n \"tile_bounds\": bounds,\n \"zoom_levels\": [12, 20],\n },\n annotations=[\n point_prediction,\n polyline_prediction,\n polygon_prediction,\n bbox_prediction,\n radio_prediction,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_prediction,\n polygon_prediction_two,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\n", - " \"global_key\": global_key,\n", - " \"tile_layer\": tile_layer,\n", - " \"tile_bounds\": bounds,\n", - " \"zoom_levels\": [12, 20],\n", - " },\n", - " annotations=[\n", - " point_prediction,\n", - " polyline_prediction,\n", - " polygon_prediction,\n", - " bbox_prediction,\n", - " radio_prediction,\n", - " bbox_with_checklist_subclass,\n", - " bbox_with_free_text_subclass,\n", - " checklist_prediction,\n", - " polygon_prediction_two,\n", - " nested_checklist_prediction,\n", - " nested_radio_prediction,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# If using NDJSON" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor prediction in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_with_free_text_subclass_ndjson,\n bbox_with_checklist_subclass_ndjson,\n bbox_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n polygon_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n nested_radio_prediction_ndjson,\n]:\n prediction.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(prediction)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for prediction in [\n", - " radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " bbox_with_free_text_subclass_ndjson,\n", - " bbox_with_checklist_subclass_ndjson,\n", - " bbox_prediction_ndjson,\n", - " point_prediction_ndjson,\n", - " polyline_prediction_ndjson,\n", - " polygon_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - "]:\n", - " prediction.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_ndjson.append(prediction)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(name=\"prediction_upload_job\" +\n str(uuid.uuid4()),\n predictions=labels)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(name=\"prediction_upload_job\" +\n", - " str(uuid.uuid4()),\n", - " predictions=labels)\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"geospatial_prediction_demo\",\n media_type=lb.MediaType.Geospatial_Tile)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"geospatial_prediction_demo\",\n", - " media_type=lb.MediaType.Geospatial_Tile)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_geospatial_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[global_key], # A list of data rows or data row ids\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_geospatial_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[global_key], # A list of data rows or data row ids\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Point #######\n\n# Python Annotation\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point_geo\",\n value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n)\n\n####### Polyline #######\nline_points = []\nline_points_ndjson = []\n\nfor sub in coords:\n line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline_geo\",\n value=lb_types.Line(points=line_points),\n)\n\npolygon_points = []\npolygon_points_ndjson = []\n\nfor sub in coords_polygon:\n polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n\n# Python Annotation\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon_geo\",\n value=lb_types.Polygon(points=polygon_points),\n)\n\nbbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\nbbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n\n# Python Annotation\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_geo\",\n value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n)\n\n# Python Annotation\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question_geo\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\n# Python Annotation\nbbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_checklist_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_name\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n)\n\nbbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n name=\"bbox_text_geo\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=-99.21019613742828,\n y=19.397447957052933), # Top left\n end=lb_types.Point(x=-99.20986354351044,\n y=19.39772119262215), # Bottom right\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n ],\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question_geo\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "####### Point #######\n", - "\n", - "# Python Annotation\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point_geo\",\n", - " value=lb_types.Point(x=-99.20647859573366, y=19.40018029091072),\n", - ")\n", - "\n", - "####### Polyline #######\n", - "line_points = []\n", - "line_points_ndjson = []\n", - "\n", - "for sub in coords:\n", - " line_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " line_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline_geo\",\n", - " value=lb_types.Line(points=line_points),\n", - ")\n", - "\n", - "polygon_points = []\n", - "polygon_points_ndjson = []\n", - "\n", - "for sub in coords_polygon:\n", - " polygon_points.append(lb_types.Point(x=sub[0], y=sub[1]))\n", - " polygon_points_ndjson.append({\"x\": sub[0], \"y\": sub[1]})\n", - "\n", - "# Python Annotation\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon_geo\",\n", - " value=lb_types.Polygon(points=polygon_points),\n", - ")\n", - "\n", - "bbox_top_left = lb_types.Point(x=-99.20746564865112, y=19.39799442829336)\n", - "bbox_bottom_right = lb_types.Point(x=-99.20568466186523, y=19.39925939999194)\n", - "\n", - "# Python Annotation\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_geo\",\n", - " value=lb_types.Rectangle(start=bbox_top_left, end=bbox_bottom_right),\n", - ")\n", - "\n", - "# Python Annotation\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question_geo\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "# Python Annotation\n", - "bbox_with_checklist_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_checklist_geo\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.210266, y=19.39540372195134), # Top left\n", - " end=lb_types.Point(x=-99.20621067903966, y=19.396901), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_name\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_free_text_subclass = lb_types.ObjectAnnotation(\n", - " name=\"bbox_text_geo\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=-99.21019613742828,\n", - " y=19.397447957052933), # Top left\n", - " end=lb_types.Point(x=-99.20986354351044,\n", - " y=19.39772119262215), # Bottom right\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text_geo\", value=lb_types.Text(answer=\"sample text\"))\n", - " ],\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question_geo\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\nlabels.append(\n lb_types.Label(\n data=lb_types.TiledImageData(\n global_key=global_key,\n tile_layer=tile_layer,\n tile_bounds=bounds,\n zoom_levels=[12, 20],\n ),\n annotations=[\n point_annotation,\n polyline_annotation,\n polygon_annotation,\n bbox_annotation,\n radio_annotation,\n bbox_with_checklist_subclass,\n bbox_with_free_text_subclass,\n checklist_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "labels.append(\n", - " lb_types.Label(\n", - " data=lb_types.TiledImageData(\n", - " global_key=global_key,\n", - " tile_layer=tile_layer,\n", - " tile_bounds=bounds,\n", - " zoom_levels=[12, 20],\n", - " ),\n", - " annotations=[\n", - " point_annotation,\n", - " polyline_annotation,\n", - " polygon_annotation,\n", - " bbox_annotation,\n", - " radio_annotation,\n", - " bbox_with_checklist_subclass,\n", - " bbox_with_free_text_subclass,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"geospatial_annotations_import_\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"geospatial_annotations_import_\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6. Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# upload_job\n# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# upload_job\n", - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/html_predictions.ipynb b/examples/prediction_upload/html_predictions.ipynb index 0caa5fdd1..f78f256ea 100644 --- a/examples/prediction_upload/html_predictions.ipynb +++ b/examples/prediction_upload/html_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# HTML Prediction Import\n", @@ -46,363 +48,138 @@ "- NER\n", "\n", "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid\nimport numpy as np", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid\n", - "import numpy as np" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification ###########\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\", # Should match the name in the ontology\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification ###########\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\", # Should match the name in the ontology\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "#### Nested Classifications ######\n\n# Python annotation\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "#### Nested Classifications ######\n", - "\n", - "# Python annotation\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Checklist ##########\n\n# Python annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_question\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Checklist ##########\n", - "\n", - "# Python annotation\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\": \"checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification Free-Form text ##########\n## Text classifications do not support confidence values\n# Python annotation\ntext_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification Free-Form text ##########\n", - "## Text classifications do not support confidence values\n", - "# Python annotation\n", - "text_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n", - " value=lb_types.Text(\n", - " answer=\"sample text\",\n", - " confidence=0.5))\n", - "\n", - "# NDJSON\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"sample_html_2.html\" + str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"html prediction demo dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"sample_html_2.html\" + str(uuid.uuid4())\n", - "\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "dataset = client.create_dataset(\n", - " name=\"html prediction demo dataset\",\n", - " iam_integration=\n", - " None, # Removing this argument will default to the organziation's default iam integration\n", - ")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names should match the name field in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\n \"radio_question\", # name matching the tool used in the annotation\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ])\n\nontology = client.create_ontology(\n \"Ontology HTML Predictions\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Html,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\n", - " \"radio_question\", # name matching the tool used in the annotation\n", - " options=[lb.Option(value=\"first_radio_answer\")],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " lb.Option(value=\"third_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ])\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology HTML Predictions\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Html,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"HTML_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"HTML_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -412,290 +189,149 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.HTMLData(global_key=global_key),\n annotations=[\n radio_prediction,\n checklist_prediction,\n text_prediction,\n nested_checklist_prediction,\n nested_radio_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label for predictions\n", - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data=lb_types.HTMLData(global_key=global_key),\n", - " annotations=[\n", - " radio_prediction,\n", - " checklist_prediction,\n", - " text_prediction,\n", - " nested_checklist_prediction,\n", - " nested_radio_prediction,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "If using NDJSON: " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction_ndjson = []\nfor annot in [\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n checklist_prediction_ndjson,\n text_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_prediction_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction_ndjson = []\n", - "for annot in [\n", - " radio_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_prediction_ndjson.append(annot)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"HTML prediction import demo\",\n media_type=lb.MediaType.Html)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"HTML prediction import demo\",\n", - " media_type=lb.MediaType.Html)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_prediction_html\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_prediction_html\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "###### Annotations ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))", + "cell_type": "code", "outputs": [], - "source": [ - "###### Annotations ######\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",)\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",),\n", - " ]),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n text_annotation,\n checklist_annotation,\n radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " text_annotation,\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"html_annotation_import\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"html_annotation_import\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/image_predictions.ipynb b/examples/prediction_upload/image_predictions.ipynb index 372881bb8..69add64e3 100644 --- a/examples/prediction_upload/image_predictions.ipynb +++ b/examples/prediction_upload/image_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Image Prediction Import\n", @@ -46,869 +48,273 @@ "- Classification - checklist\n", "\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * If you are importing more than 1,000 mask predictions at a time, consider submitting separate jobs, as they can take longer than other prediction types to import.\n", " * After the execution of this notebook a complete Model Run with predictions will be created in your organization. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"second_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\", confidence=0.5)),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotations\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotations\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " confidence=0.5,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"confidence\": 0.5,\n", - " \"bbox\": {\n", - " \"top\": 977,\n", - " \"left\": 1690,\n", - " \"height\": 330,\n", - " \"width\": 225\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Bounding box with nested classification #######\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Bounding box with nested classification #######\n", - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "## NDJSON\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"confidence\": 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 933,\n", - " \"left\": 541,\n", - " \"height\": 191,\n", - " \"width\": 330\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\n# NDJSON\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Anotation\n", - "polygon_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " confidence=0.5,\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "\n", - "polygon_prediction_ndjson = {\n", - " \"name\":\n", - " \"polygon\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"polygon\": [\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " {\n", - " \"x\": 2278.306,\n", - " \"y\": 256.885\n", - " },\n", - " {\n", - " \"x\": 2428.197,\n", - " \"y\": 200.437\n", - " },\n", - " {\n", - " \"x\": 2560.0,\n", - " \"y\": 335.419\n", - " },\n", - " {\n", - " \"x\": 2557.386,\n", - " \"y\": 503.165\n", - " },\n", - " {\n", - " \"x\": 2320.596,\n", - " \"y\": 503.103\n", - " },\n", - " {\n", - " \"x\": 2156.083,\n", - " \"y\": 628.943\n", - " },\n", - " {\n", - " \"x\": 2161.111,\n", - " \"y\": 785.519\n", - " },\n", - " {\n", - " \"x\": 2002.115,\n", - " \"y\": 894.647\n", - " },\n", - " {\n", - " \"x\": 1838.456,\n", - " \"y\": 877.874\n", - " },\n", - " {\n", - " \"x\": 1436.53,\n", - " \"y\": 874.636\n", - " },\n", - " {\n", - " \"x\": 1411.403,\n", - " \"y\": 758.579\n", - " },\n", - " {\n", - " \"x\": 1353.853,\n", - " \"y\": 751.74\n", - " },\n", - " {\n", - " \"x\": 1345.264,\n", - " \"y\": 453.461\n", - " },\n", - " {\n", - " \"x\": 1426.011,\n", - " \"y\": 421.129\n", - " },\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "text_annotation = lb_types.ClassificationAnnotation(name=\"free_text\",\n", - " value=lb_types.Text(\n", - " answer=\"sample text\",\n", - " confidence=0.5))\n", - "\n", - "# NDJSON\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Segmentation mask" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "### Raster Segmentation (Byte string array)\nurl = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg.png\"\nresponse = requests.get(url)\n\nmask_data = lb.types.MaskData(\n im_bytes=response.content\n) # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\nmask_prediction = lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=(255, 255,\n 255)))\n\n# NDJSON using instanceURI, bytes array is not fully supported.\nmask_prediction_ndjson = {\n \"name\": \"mask\",\n \"classifications\": [],\n \"mask\": {\n \"instanceURI\": url,\n \"colorRGB\": (255, 255, 255)\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "### Raster Segmentation (Byte string array)\n", - "url = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg.png\"\n", - "response = requests.get(url)\n", - "\n", - "mask_data = lb.types.MaskData(\n", - " im_bytes=response.content\n", - ") # You can also use \"url\" instead of img_bytes to pass the PNG mask url.\n", - "mask_prediction = lb_types.ObjectAnnotation(name=\"mask\",\n", - " value=lb_types.Mask(mask=mask_data,\n", - " color=(255, 255,\n", - " 255)))\n", - "\n", - "# NDJSON using instanceURI, bytes array is not fully supported.\n", - "mask_prediction_ndjson = {\n", - " \"name\": \"mask\",\n", - " \"classifications\": [],\n", - " \"mask\": {\n", - " \"instanceURI\": url,\n", - " \"colorRGB\": (255, 255, 255)\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Segmentation mask with nested classification" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "url_2 = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg_with_subclass.png\"\nresponse_2 = requests.get(url_2)\nmask_data_2 = lb_types.MaskData(im_bytes=response_2.content)\n\n# Python annotation\nmask_with_text_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer\"))\n ],\n)\n\n# NDJSON using instanceURI, bytes array is not fully supported.\nmask_with_text_subclass_prediction_ndjson = {\n \"name\":\n \"mask_with_text_subclass\",\n \"mask\": {\n \"instanceURI\": url_2,\n \"colorRGB\": (255, 255, 255)\n },\n \"classifications\": [{\n \"name\": \"sub_free_text\",\n \"answer\": \"free text answer\"\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "url_2 = \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/raster_seg_with_subclass.png\"\n", - "response_2 = requests.get(url_2)\n", - "mask_data_2 = lb_types.MaskData(im_bytes=response_2.content)\n", - "\n", - "# Python annotation\n", - "mask_with_text_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n", - " value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_free_text\",\n", - " value=lb_types.Text(answer=\"free text answer\"))\n", - " ],\n", - ")\n", - "\n", - "# NDJSON using instanceURI, bytes array is not fully supported.\n", - "mask_with_text_subclass_prediction_ndjson = {\n", - " \"name\":\n", - " \"mask_with_text_subclass\",\n", - " \"mask\": {\n", - " \"instanceURI\": url_2,\n", - " \"colorRGB\": (255, 255, 255)\n", - " },\n", - " \"classifications\": [{\n", - " \"name\": \"sub_free_text\",\n", - " \"answer\": \"free text answer\"\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "point_prediction = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " confidence=0.5,\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "# NDJSON\n", - "point_prediction_ndjson = {\n", - " \"name\": \"point\",\n", - " \"confidence\": 0.5,\n", - " \"classifications\": [],\n", - " \"point\": {\n", - " \"x\": 1166.606,\n", - " \"y\": 1441.768\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\n\npolyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "\n", - "polyline_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " confidence=0.5,\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "polyline_prediction_ndjson = {\n", - " \"name\":\n", - " \"polyline\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [],\n", - " \"line\": [\n", - " {\n", - " \"x\": 2534.353,\n", - " \"y\": 249.471\n", - " },\n", - " {\n", - " \"x\": 2429.492,\n", - " \"y\": 182.092\n", - " },\n", - " {\n", - " \"x\": 2294.322,\n", - " \"y\": 221.962\n", - " },\n", - " {\n", - " \"x\": 2224.491,\n", - " \"y\": 180.463\n", - " },\n", - " {\n", - " \"x\": 2136.123,\n", - " \"y\": 204.716\n", - " },\n", - " {\n", - " \"x\": 1712.247,\n", - " \"y\": 173.949\n", - " },\n", - " {\n", - " \"x\": 1703.838,\n", - " \"y\": 84.438\n", - " },\n", - " {\n", - " \"x\": 1579.772,\n", - " \"y\": 82.61\n", - " },\n", - " {\n", - " \"x\": 1583.442,\n", - " \"y\": 167.552\n", - " },\n", - " {\n", - " \"x\": 1478.869,\n", - " \"y\": 164.903\n", - " },\n", - " {\n", - " \"x\": 1418.941,\n", - " \"y\": 318.149\n", - " },\n", - " {\n", - " \"x\": 1243.128,\n", - " \"y\": 400.815\n", - " },\n", - " {\n", - " \"x\": 1022.067,\n", - " \"y\": 319.007\n", - " },\n", - " {\n", - " \"x\": 892.367,\n", - " \"y\": 379.216\n", - " },\n", - " {\n", - " \"x\": 670.273,\n", - " \"y\": 364.408\n", - " },\n", - " {\n", - " \"x\": 613.114,\n", - " \"y\": 288.16\n", - " },\n", - " {\n", - " \"x\": 377.559,\n", - " \"y\": 238.251\n", - " },\n", - " {\n", - " \"x\": 368.087,\n", - " \"y\": 185.064\n", - " },\n", - " {\n", - " \"x\": 246.557,\n", - " \"y\": 167.286\n", - " },\n", - " {\n", - " \"x\": 236.648,\n", - " \"y\": 285.61\n", - " },\n", - " {\n", - " \"x\": 90.929,\n", - " \"y\": 326.412\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s.jpeg\" + str(uuid.uuid4())\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(name=\"image_prediction_demo\")\ntask = dataset.create_data_rows([test_img_url])\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"2560px-Kitano_Street_Kobe01s.jpeg\" + str(uuid.uuid4())\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "dataset = client.create_dataset(name=\"image_prediction_demo\")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n lb.Tool(\n tool=lb.Tool.Type.RASTER_SEGMENTATION,\n name=\"mask_with_text_subclass\",\n classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"sub_free_text\")\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of tools\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", - " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"mask\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.RASTER_SEGMENTATION,\n", - " name=\"mask_with_text_subclass\",\n", - " classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"sub_free_text\")\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Image Prediction Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"image_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"image_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -916,398 +322,150 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section. \n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n mask_prediction,\n mask_with_text_subclass_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label for predictions\n", - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data=lb_types.ImageData(global_key=global_key),\n", - " annotations=[\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " polyline_prediction,\n", - " polygon_prediction,\n", - " mask_prediction,\n", - " mask_with_text_subclass_prediction,\n", - " point_prediction,\n", - " text_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON:" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n mask_prediction_ndjson,\n mask_with_text_subclass_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction_ndjson = []\n", - "\n", - "for annot in [\n", - " radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " polygon_prediction_ndjson,\n", - " mask_prediction_ndjson,\n", - " mask_with_text_subclass_prediction_ndjson,\n", - " point_prediction_ndjson,\n", - " polyline_prediction_ndjson,\n", - " text_annotation_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - "]:\n", - " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_prediction_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for prediction uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations. \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Image Prediction Demo\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"Image Prediction Demo\",\n", - " media_type=lb.MediaType.Image)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nmask_annotation = lb_types.ObjectAnnotation(name=\"mask\",\n value=lb_types.Mask(mask=mask_data,\n color=(255, 255,\n 255)))\n\nmask_with_text_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_free_text\",\n value=lb_types.Text(answer=\"free text answer\"))\n ],\n)\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "########### Annotations ###########\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "mask_annotation = lb_types.ObjectAnnotation(name=\"mask\",\n", - " value=lb_types.Mask(mask=mask_data,\n", - " color=(255, 255,\n", - " 255)))\n", - "\n", - "mask_with_text_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"mask_with_text_subclass\", # must match your ontology feature\"s name\n", - " value=lb_types.Mask(mask=mask_data_2, color=(255, 255, 255)),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_free_text\",\n", - " value=lb_types.Text(answer=\"free text answer\"))\n", - " ],\n", - ")\n", - "\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n mask_annotation,\n mask_with_text_subclass_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "label = []\n", - "annotations = [\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " polygon_annotation,\n", - " mask_annotation,\n", - " mask_with_text_subclass_annotation,\n", - " point_annotation,\n", - " polyline_annotation,\n", - "]\n", - "label.append(\n", - " lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"annotation_import_\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the annotations from the project and add them to the model\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the annotations from the project and add them to the model\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 83c168fcd..f1d2637e1 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -51,419 +53,115 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_prediction_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": bbox_dim_1,\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", - ")\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\",\n", - " confidence=0.5,\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": bbox_dim,\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_prediction_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -477,200 +175,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -679,508 +237,184 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json_str)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "client.enable_experimental = True\n", - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = json.loads(output.json_str)\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - "\n", - "# re-write the entity annotation with text selections\n", - "entities_prediction_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_prediction_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " textSelections=text_selections_ner,\n", - ")\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", - "print(f\"entities_annotation={entities_prediction}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions = []\n", - "\n", - "label_predictions.append(\n", - " lb_types.Label(\n", - " data=lb_types.DocumentData(global_key=global_key),\n", - " annotations=[\n", - " entities_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " text_prediction,\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " ner_with_checklist_subclass_prediction,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions_ndjson = []\n", - "for annot in [\n", - " entities_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " ner_with_checklist_subclass_prediction_ndjson,\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_predictions_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_predictions,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(name=\"Document Prediction Import Demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", + "cell_type": "code", "outputs": [], - "source": [ - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(name=\"named_entity\",\n", - " textSelections=text_selections),\n", - ")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",)\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", - " text_selections=text_selections_ner),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb index aba84a546..7e4cd048e 100644 --- a/examples/prediction_upload/text_predictions.ipynb +++ b/examples/prediction_upload/text_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Text Prediction Import\n", @@ -48,392 +50,145 @@ "\n", "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle.\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entities ##########\n\n# Python annotation\nnamed_entity = lb_types.TextEntity(start=10, end=20)\nentities_prediction = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\",\n confidence=0.5)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\": \"named_entity\",\n \"confidence\": 0.5,\n \"location\": {\n \"start\": 10,\n \"end\": 20\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entities ##########\n", - "\n", - "# Python annotation\n", - "named_entity = lb_types.TextEntity(start=10, end=20)\n", - "entities_prediction = lb_types.ObjectAnnotation(value=named_entity,\n", - " name=\"named_entity\",\n", - " confidence=0.5)\n", - "\n", - "# NDJSON\n", - "entities_prediction_ndjson = {\n", - " \"name\": \"named_entity\",\n", - " \"confidence\": 0.5,\n", - " \"location\": {\n", - " \"start\": 10,\n", - " \"end\": 20\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio (single choice ) ##########\n\n# Python annotation\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio (single choice ) ##########\n", - "\n", - "# Python annotation\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification - Radio and Checklist (with subclassifcations) ##########\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification - Radio and Checklist (with subclassifcations) ##########\n", - "\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Checklist ##########\n\n# Python annotation\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\": \"checklist_question\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Checklist ##########\n", - "\n", - "# Python annotation\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\": \"checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classification Free-Form text ##########\n\n# Python annotation\ntext_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5))\n\n# NDJSON\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classification Free-Form text ##########\n", - "\n", - "# Python annotation\n", - "text_prediction = lb_types.ClassificationAnnotation(name=\"free_text\",\n", - " value=lb_types.Text(\n", - " answer=\"sample text\",\n", - " confidence=0.5))\n", - "\n", - "# NDJSON\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"text prediction demo dataset\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"lorem-ipsum.txt\" + str(uuid.uuid4())\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "dataset = client.create_dataset(\n", - " name=\"text prediction demo dataset\",\n", - " iam_integration=\n", - " None, # Removing this argument will default to the organziation's default iam integration\n", - ")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows:\", task.failed_data_rows)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[lb.Option(value=\"first_radio_answer\")],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n value=\"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n lb.Option(value=\"third_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\")\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Text Predictions\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Text,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[lb.Option(value=\"first_radio_answer\")],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " lb.Option(value=\"third_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\")\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Text Predictions\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Text,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"text_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"text_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -443,293 +198,149 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting label_ndjson should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label for predictions\nlabel_predictions = []\nlabel_predictions.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_prediction,\n nested_radio_prediction,\n radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label for predictions\n", - "label_predictions = []\n", - "label_predictions.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_prediction,\n", - " nested_radio_prediction,\n", - " radio_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " text_prediction,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "If using NDJSON: " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson_predictions = []\nfor annot in [\n entities_prediction_ndjson,\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n text_prediction_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson_predictions.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson_predictions = []\n", - "for annot in [\n", - " entities_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - "]:\n", - " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson_predictions.append(annot)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_predictions,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"Text Prediction Import Demo\",\n media_type=lb.MediaType.Text)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"Text Prediction Import Demo\",\n", - " media_type=lb.MediaType.Text)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "named_entity = lb_types.TextEntity(start=10, end=20)\nentities_annotation = lb_types.ObjectAnnotation(value=named_entity,\n name=\"named_entity\")\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))", + "cell_type": "code", "outputs": [], - "source": [ - "named_entity = lb_types.TextEntity(start=10, end=20)\n", - "entities_annotation = lb_types.ObjectAnnotation(value=named_entity,\n", - " name=\"named_entity\")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"third_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n# Create a Label for predictions\nlabel = []\nlabel.append(\n lb_types.Label(\n data=lb_types.TextData(global_key=global_key),\n annotations=[\n entities_annotation,\n nested_radio_annotation,\n radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "# Create a Label for predictions\n", - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data=lb_types.TextData(global_key=global_key),\n", - " annotations=[\n", - " entities_annotation,\n", - " nested_radio_annotation,\n", - " radio_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " ],\n", - " ))" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/video_predictions.ipynb b/examples/prediction_upload/video_predictions.ipynb index 62ea29567..63fe579bd 100644 --- a/examples/prediction_upload/video_predictions.ipynb +++ b/examples/prediction_upload/video_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Video Prediction Import \n", @@ -47,1450 +49,328 @@ "- Raster segmentation masks [not supported in model]\n", "- Vector segmentation masks [not supported in video editor]\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API Key \n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions\n", "- Confidence scores are currently not supported for segment or frame annotations, which are required for bounding box, point, and line for video assets. For this tutorial, only the radio and checklist annotations will have confidence scores." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Bounding box (frame specific) ###########\n\n# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation\n\n# bbox dimensions\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n\n# Python Annotation\nbbox_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=15,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\n# NDJSON\nbbox_prediction_ndjson = {\n \"name\":\n \"bbox_video\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 13,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 15,\n \"bbox\": bbox_dm\n },\n {\n \"frame\": 19,\n \"bbox\": bbox_dm\n },\n ]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Bounding box (frame specific) ###########\n", - "\n", - "# Confidence scores are not supported for frame specific bounding box annotations and VideoObjectAnnotation\n", - "\n", - "# bbox dimensions\n", - "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", - "\n", - "# Python Annotation\n", - "bbox_prediction = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"],\n", - " y=bbox_dm[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=15,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=19,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - "]\n", - "\n", - "# NDJSON\n", - "bbox_prediction_ndjson = {\n", - " \"name\":\n", - " \"bbox_video\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 13,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " {\n", - " \"frame\": 15,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " {\n", - " \"frame\": 19,\n", - " \"bbox\": bbox_dm\n", - " },\n", - " ]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Point ########\n# Confidence score is not supported for VideoObjectAnnotation\n# Python Annotation\npoint_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n# NDJSON\npoint_prediction_ndjson = {\n \"name\":\n \"point_video\",\n \"confidence\":\n 0.5,\n \"segments\": [{\n \"keyframes\": [{\n \"frame\": 17,\n \"point\": {\n \"x\": 660.134,\n \"y\": 407.926\n }\n }]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Point ########\n", - "# Confidence score is not supported for VideoObjectAnnotation\n", - "# Python Annotation\n", - "point_prediction = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"point_video\",\n", - " keyframe=True,\n", - " frame=17,\n", - " value=lb_types.Point(x=660.134, y=407.926),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "point_prediction_ndjson = {\n", - " \"name\":\n", - " \"point_video\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"segments\": [{\n", - " \"keyframes\": [{\n", - " \"frame\": 17,\n", - " \"point\": {\n", - " \"x\": 660.134,\n", - " \"y\": 407.926\n", - " }\n", - " }]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Polyline (frame specific) ########\n# confidence scores are not supported in polyline annotations\n\n# Python Annotation\npolyline_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=12,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\n# NDJSON\npolyline_prediction_ndjson = {\n \"name\":\n \"line_video_frame\",\n \"segments\": [\n {\n \"keyframes\": [\n {\n \"frame\":\n 5,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 100\n },\n {\n \"x\": 100,\n \"y\": 190\n },\n {\n \"x\": 190,\n \"y\": 220\n },\n ],\n },\n {\n \"frame\":\n 12,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 280\n },\n {\n \"x\": 300,\n \"y\": 380\n },\n {\n \"x\": 400,\n \"y\": 460\n },\n ],\n },\n {\n \"frame\":\n 20,\n \"line\": [\n {\n \"x\": 680,\n \"y\": 180\n },\n {\n \"x\": 100,\n \"y\": 200\n },\n {\n \"x\": 200,\n \"y\": 260\n },\n ],\n },\n ]\n },\n {\n \"keyframes\": [\n {\n \"frame\": 24,\n \"line\": [{\n \"x\": 300,\n \"y\": 310\n }, {\n \"x\": 330,\n \"y\": 430\n }],\n },\n {\n \"frame\": 45,\n \"line\": [{\n \"x\": 600,\n \"y\": 810\n }, {\n \"x\": 900,\n \"y\": 930\n }],\n },\n ]\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Polyline (frame specific) ########\n", - "# confidence scores are not supported in polyline annotations\n", - "\n", - "# Python Annotation\n", - "polyline_prediction = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=5,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=12,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=20,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=24,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=45,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - "]\n", - "\n", - "# NDJSON\n", - "polyline_prediction_ndjson = {\n", - " \"name\":\n", - " \"line_video_frame\",\n", - " \"segments\": [\n", - " {\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\":\n", - " 5,\n", - " \"line\": [\n", - " {\n", - " \"x\": 680,\n", - " \"y\": 100\n", - " },\n", - " {\n", - " \"x\": 100,\n", - " \"y\": 190\n", - " },\n", - " {\n", - " \"x\": 190,\n", - " \"y\": 220\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"frame\":\n", - " 12,\n", - " \"line\": [\n", - " {\n", - " \"x\": 680,\n", - " \"y\": 280\n", - " },\n", - " {\n", - " \"x\": 300,\n", - " \"y\": 380\n", - " },\n", - " {\n", - " \"x\": 400,\n", - " \"y\": 460\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"frame\":\n", - " 20,\n", - " \"line\": [\n", - " {\n", - " \"x\": 680,\n", - " \"y\": 180\n", - " },\n", - " {\n", - " \"x\": 100,\n", - " \"y\": 200\n", - " },\n", - " {\n", - " \"x\": 200,\n", - " \"y\": 260\n", - " },\n", - " ],\n", - " },\n", - " ]\n", - " },\n", - " {\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 24,\n", - " \"line\": [{\n", - " \"x\": 300,\n", - " \"y\": 310\n", - " }, {\n", - " \"x\": 330,\n", - " \"y\": 430\n", - " }],\n", - " },\n", - " {\n", - " \"frame\": 45,\n", - " \"line\": [{\n", - " \"x\": 600,\n", - " \"y\": 810\n", - " }, {\n", - " \"x\": 900,\n", - " \"y\": 930\n", - " }],\n", - " },\n", - " ]\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Frame base classifications ########\n\n# Python Annotation\nradio_prediction = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n ),\n]\n\nchecklist_prediction = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=39,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=45,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5)\n ]),\n ),\n]\n\n## NDJSON\nframe_radio_classification_prediction_ndjson = {\n \"name\": \"radio_class\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"frames\": [{\n \"start\": 9,\n \"end\": 15\n }],\n },\n}\n\n## frame specific\nframe_checklist_classification_prediction_ndjson = {\n \"name\":\n \"checklist_class\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"frames\": [{\n \"start\": 29,\n \"end\": 35\n }],\n },\n {\n \"name\": \"second_checklist_answer\",\n \"frames\": [{\n \"start\": 39,\n \"end\": 45\n }],\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Frame base classifications ########\n", - "\n", - "# Python Annotation\n", - "radio_prediction = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=9,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=15,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - " ),\n", - "]\n", - "\n", - "checklist_prediction = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=29,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=35,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=39,\n", - " segment_index=1,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=45,\n", - " segment_index=1,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " ),\n", - "]\n", - "\n", - "## NDJSON\n", - "frame_radio_classification_prediction_ndjson = {\n", - " \"name\": \"radio_class\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"frames\": [{\n", - " \"start\": 9,\n", - " \"end\": 15\n", - " }],\n", - " },\n", - "}\n", - "\n", - "## frame specific\n", - "frame_checklist_classification_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_class\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"frames\": [{\n", - " \"start\": 29,\n", - " \"end\": 35\n", - " }],\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"frames\": [{\n", - " \"start\": 39,\n", - " \"end\": 45\n", - " }],\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "####### Global Classifications #########\n\n# Python Annotation\n## For global classifications use ClassificationAnnotation\nglobal_radio_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n )\n]\n\nglobal_checklist_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n )\n]\n\n# NDJSON\nglobal_radio_classification_ndjson = {\n \"name\": \"radio_class_global\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}\n\nglobal_checklist_classification_ndjson = {\n \"name\":\n \"checklist_class_global\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "####### Global Classifications #########\n", - "\n", - "# Python Annotation\n", - "## For global classifications use ClassificationAnnotation\n", - "global_radio_prediction = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"radio_class_global\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - " )\n", - "]\n", - "\n", - "global_checklist_prediction = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_global\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "global_radio_classification_ndjson = {\n", - " \"name\": \"radio_class_global\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}\n", - "\n", - "global_checklist_classification_ndjson = {\n", - " \"name\":\n", - " \"checklist_class_global\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Nested Global Classification ###########\n\n# Python Annotation\nnested_radio_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n )),\n )\n]\n\n# NDJSON\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}\n\n# Python Annotation\nnested_checklist_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n )\n ]),\n )\n ],\n )\n ]),\n )\n]\n\n# NDJSON\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5,\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Nested Global Classification ###########\n", - "\n", - "# Python Annotation\n", - "nested_radio_prediction = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}\n", - "\n", - "# Python Annotation\n", - "nested_checklist_prediction = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "# NDJSON\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Classifications under frame base tools ##########\n# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n\n# bounding box dimensions\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n\n# Python Annotation\nframe_bbox_with_checklist_subclass_prediction = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ),\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\", confidence=0.5)\n ]),\n )\n ],\n ),\n]\n\nframe_bbox_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"bbox_class\",\n \"segments\": [{\n \"keyframes\": [\n {\n \"frame\": 10,\n \"bbox\": bbox_dm2\n },\n {\n \"frame\":\n 11,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\":\n \"bbox_radio\",\n \"answer\": [{\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5,\n }],\n }],\n },\n {\n \"frame\":\n 13,\n \"bbox\":\n bbox_dm2,\n \"classifications\": [{\n \"name\":\n \"bbox_radio\",\n \"answer\": [{\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5,\n }],\n }],\n },\n ]\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Classifications under frame base tools ##########\n", - "# Confidence scores are not supported for frame specific bounding box annotations with sub-classifications\n", - "\n", - "# bounding box dimensions\n", - "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", - "\n", - "# Python Annotation\n", - "frame_bbox_with_checklist_subclass_prediction = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=10,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", - " y=bbox_dm2[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=11,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"], y=bbox_dm2[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ),\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"second_checklist_answer\", confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - "]\n", - "\n", - "frame_bbox_with_checklist_subclass_prediction_ndjson = {\n", - " \"name\":\n", - " \"bbox_class\",\n", - " \"segments\": [{\n", - " \"keyframes\": [\n", - " {\n", - " \"frame\": 10,\n", - " \"bbox\": bbox_dm2\n", - " },\n", - " {\n", - " \"frame\":\n", - " 11,\n", - " \"bbox\":\n", - " bbox_dm2,\n", - " \"classifications\": [{\n", - " \"name\":\n", - " \"bbox_radio\",\n", - " \"answer\": [{\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " }],\n", - " }],\n", - " },\n", - " {\n", - " \"frame\":\n", - " 13,\n", - " \"bbox\":\n", - " bbox_dm2,\n", - " \"classifications\": [{\n", - " \"name\":\n", - " \"bbox_radio\",\n", - " \"answer\": [{\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5,\n", - " }],\n", - " }],\n", - " },\n", - " ]\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### Free text classification ###########\ntext_prediction = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature's name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n )\n]\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"confidence\": 0.5,\n \"answer\": \"sample text\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### Free text classification ###########\n", - "text_prediction = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature's name\n", - " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", - " )\n", - "]\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"confidence\": 0.5,\n", - " \"answer\": \"sample text\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"sample-video-2.mp4\" + str(uuid.uuid4())\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n \"global_key\":\n global_key,\n}\ndataset = client.create_dataset(\n name=\"Video prediction demo\",\n iam_integration=\n None, # Removing this argument will default to the organziation's default iam integration\n)\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors: \", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"sample-video-2.mp4\" + str(uuid.uuid4())\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "dataset = client.create_dataset(\n", - " name=\"Video prediction demo\",\n", - " iam_integration=\n", - " None, # Removing this argument will default to the organziation's default iam integration\n", - ")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "print(\"Errors: \", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_class\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n )\n ],\n ),\n ],\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class\",\n scope=lb.Classification.Scope.\n INDEX, ## defined scope for frame classifications\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class\",\n scope=lb.Classification.Scope.INDEX,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_class_global\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_class_global\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Ontology Video Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Video,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bbox_video\"),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point_video\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"line_video_frame\"),\n", - " lb.Tool(tool=lb.Tool.Type.RASTER_SEGMENTATION, name=\"video_mask\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_class\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class\",\n", - " scope=lb.Classification.Scope.\n", - " INDEX, ## defined scope for frame classifications\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class\",\n", - " scope=lb.Classification.Scope.\n", - " INDEX, ## defined scope for frame classifications\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_class\",\n", - " scope=lb.Classification.Scope.INDEX,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_class_global\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_class_global\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Video Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Video,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"video_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"video_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", "\n", "Create the annotations payload using the snippets of [code here](https://docs.labelbox.com/reference/import-video-annotations).\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python Annotation Types" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions = []\nannotations_list = [\n point_prediction,\n bbox_prediction,\n polyline_prediction,\n checklist_prediction,\n radio_prediction,\n nested_radio_prediction,\n nested_checklist_prediction,\n frame_bbox_with_checklist_subclass_prediction,\n global_radio_prediction,\n global_checklist_prediction,\n text_prediction,\n]\n\nflatten_list_annotations = [\n ann for ann_sublist in annotations_list for ann in ann_sublist\n]\n\nlabel_predictions.append(\n lb_types.Label(data={\"global_key\": global_key},\n annotations=flatten_list_annotations))", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions = []\n", - "annotations_list = [\n", - " point_prediction,\n", - " bbox_prediction,\n", - " polyline_prediction,\n", - " checklist_prediction,\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " nested_checklist_prediction,\n", - " frame_bbox_with_checklist_subclass_prediction,\n", - " global_radio_prediction,\n", - " global_checklist_prediction,\n", - " text_prediction,\n", - "]\n", - "\n", - "flatten_list_annotations = [\n", - " ann for ann_sublist in annotations_list for ann in ann_sublist\n", - "]\n", - "\n", - "label_predictions.append(\n", - " lb_types.Label(data={\"global_key\": global_key},\n", - " annotations=flatten_list_annotations))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJSON annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel_prediction_ndjson = []\n\nfor annotation in [\n point_prediction_ndjson,\n bbox_prediction_ndjson,\n polyline_prediction_ndjson,\n frame_checklist_classification_prediction_ndjson,\n frame_radio_classification_prediction_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n frame_bbox_with_checklist_subclass_prediction_ndjson,\n global_radio_classification_ndjson,\n global_checklist_classification_ndjson,\n text_prediction_ndjson,\n]:\n annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annotation)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "label_prediction_ndjson = []\n", - "\n", - "for annotation in [\n", - " point_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " polyline_prediction_ndjson,\n", - " frame_checklist_classification_prediction_ndjson,\n", - " frame_radio_classification_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " frame_bbox_with_checklist_subclass_prediction_ndjson,\n", - " global_radio_classification_ndjson,\n", - " global_checklist_classification_ndjson,\n", - " text_prediction_ndjson,\n", - "]:\n", - " annotation.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_prediction_ndjson.append(annotation)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_predictions,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run \n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"video_prediction_demo\",\n media_type=lb.MediaType.Video)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"video_prediction_demo\",\n", - " media_type=lb.MediaType.Video)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_video_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[global_key\n ], # A list of data rows, data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_video_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[global_key\n", - " ], # A list of data rows, data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\npoint_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"point_video\",\n keyframe=True,\n frame=17,\n value=lb_types.Point(x=660.134, y=407.926),\n )\n]\n\n######## Polyline ########\n\n# Python Annotation\npolyline_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=5,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=12,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=20,\n segment_index=0,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=24,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"line_video_frame\",\n keyframe=True,\n frame=45,\n segment_index=1,\n value=lb_types.Line(\n points=[lb_types.Point(x=680, y=100),\n lb_types.Point(x=100, y=190)]),\n ),\n]\n\nradio_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=9,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"radio_class\",\n frame=15,\n segment_index=0,\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n ),\n]\n\nchecklist_annotation = [\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=29,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=35,\n segment_index=0,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=39,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n ]),\n ),\n lb_types.VideoClassificationAnnotation(\n name=\"checklist_class\",\n frame=45,\n segment_index=1,\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n ]),\n ),\n]\n\nglobal_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"radio_class_global\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n )\n]\n\nglobal_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"checklist_class_global\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n )\n]\n\nnested_radio_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n )\n]\n\nnested_checklist_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n )\n]\n\nbbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\nframe_bbox_with_checklist_subclass = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=10,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=11,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n ]),\n )\n ],\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_class\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm2[\"left\"],\n y=bbox_dm2[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n ), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"checklist_class\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\")\n ]),\n )\n ],\n ),\n]\n\nbbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\nbbox_annotation = [\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=13,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"],\n y=bbox_dm[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ), # x= left + width , y = top + height\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=15,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n lb_types.VideoObjectAnnotation(\n name=\"bbox_video\",\n keyframe=True,\n frame=19,\n segment_index=0,\n value=lb_types.Rectangle(\n start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n end=lb_types.Point(\n x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n ),\n ),\n ),\n]\n\ntext_annotation = [\n lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature's name\n value=lb_types.Text(answer=\"sample text\"),\n )\n]", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "point_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"point_video\",\n", - " keyframe=True,\n", - " frame=17,\n", - " value=lb_types.Point(x=660.134, y=407.926),\n", - " )\n", - "]\n", - "\n", - "######## Polyline ########\n", - "\n", - "# Python Annotation\n", - "polyline_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=5,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=12,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=20,\n", - " segment_index=0,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=24,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"line_video_frame\",\n", - " keyframe=True,\n", - " frame=45,\n", - " segment_index=1,\n", - " value=lb_types.Line(\n", - " points=[lb_types.Point(x=680, y=100),\n", - " lb_types.Point(x=100, y=190)]),\n", - " ),\n", - "]\n", - "\n", - "radio_annotation = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=9,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"radio_class\",\n", - " frame=15,\n", - " segment_index=0,\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " ),\n", - "]\n", - "\n", - "checklist_annotation = [\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=29,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=35,\n", - " segment_index=0,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=39,\n", - " segment_index=1,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n", - " ]),\n", - " ),\n", - " lb_types.VideoClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " frame=45,\n", - " segment_index=1,\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\")\n", - " ]),\n", - " ),\n", - "]\n", - "\n", - "global_radio_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"radio_class_global\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - " )\n", - "]\n", - "\n", - "global_checklist_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class_global\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "nested_radio_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - "]\n", - "\n", - "nested_checklist_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - " )\n", - "]\n", - "\n", - "bbox_dm2 = {\"top\": 146.0, \"left\": 98.0, \"height\": 382.0, \"width\": 341.0}\n", - "frame_bbox_with_checklist_subclass = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=10,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", - " y=bbox_dm2[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=11,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", - " y=bbox_dm2[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_class\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm2[\"left\"],\n", - " y=bbox_dm2[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm2[\"left\"] + bbox_dm2[\"width\"],\n", - " y=bbox_dm2[\"top\"] + bbox_dm2[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"checklist_class\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"second_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " ),\n", - "]\n", - "\n", - "bbox_dm = {\"top\": 617, \"left\": 1371, \"height\": 419, \"width\": 505}\n", - "bbox_annotation = [\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=13,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"],\n", - " y=bbox_dm[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=15,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - " lb_types.VideoObjectAnnotation(\n", - " name=\"bbox_video\",\n", - " keyframe=True,\n", - " frame=19,\n", - " segment_index=0,\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=bbox_dm[\"left\"], y=bbox_dm[\"top\"]),\n", - " end=lb_types.Point(\n", - " x=bbox_dm[\"left\"] + bbox_dm[\"width\"],\n", - " y=bbox_dm[\"top\"] + bbox_dm[\"height\"],\n", - " ),\n", - " ),\n", - " ),\n", - "]\n", - "\n", - "text_annotation = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature's name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - " )\n", - "]" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, - "source": [] + "source": [], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n\nlabels = []\nannotations_list = [\n checklist_annotation,\n radio_annotation,\n bbox_annotation,\n frame_bbox_with_checklist_subclass,\n point_annotation,\n polyline_annotation,\n global_checklist_annotation,\n global_radio_annotation,\n nested_checklist_annotation,\n nested_radio_annotation,\n text_annotation,\n]\n\nflatten_list_annotations = [\n ann for ann_sublist in annotations_list for ann in ann_sublist\n]\n\nlabels.append(\n lb_types.Label(\n data=lb_types.VideoData(global_key=global_key),\n annotations=flatten_list_annotations,\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "\n", - "labels = []\n", - "annotations_list = [\n", - " checklist_annotation,\n", - " radio_annotation,\n", - " bbox_annotation,\n", - " frame_bbox_with_checklist_subclass,\n", - " point_annotation,\n", - " polyline_annotation,\n", - " global_checklist_annotation,\n", - " global_radio_annotation,\n", - " nested_checklist_annotation,\n", - " nested_radio_annotation,\n", - " text_annotation,\n", - "]\n", - "\n", - "flatten_list_annotations = [\n", - " ann for ann_sublist in annotations_list for ann in ann_sublist\n", - "]\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data=lb_types.VideoData(global_key=global_key),\n", - " annotations=flatten_list_annotations,\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"video_annotations_import_\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"video_annotations_import_\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6. Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/project_configuration/webhooks.ipynb b/examples/project_configuration/webhooks.ipynb index 482abec79..36b6f977b 100644 --- a/examples/project_configuration/webhooks.ipynb +++ b/examples/project_configuration/webhooks.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,115 +24,69 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Webhook Configuration" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Webhooks are supported for the following events:\n", "* label_created\n", "* label_updated\n", "* label_deleted" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"\n%pip install -q requests\n%pip install -q hmac\n%pip install -q hashlib\n%pip install -q flask\n%pip install -q Werkzeug", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"\n", - "%pip install -q requests\n", - "%pip install -q hmac\n", - "%pip install -q hashlib\n", - "%pip install -q flask\n", - "%pip install -q Werkzeug" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nfrom flask import Flask, request\nimport hmac\nimport hashlib\nimport threading\nfrom werkzeug.serving import run_simple\nimport json\nimport requests\nimport os\nfrom getpass import getpass\nimport socket", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "from flask import Flask, request\n", - "import hmac\n", - "import hashlib\n", - "import threading\n", - "from werkzeug.serving import run_simple\n", - "import json\n", - "import requests\n", - "import os\n", - "from getpass import getpass\n", - "import socket" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# If you don\"t want to give google access to drive you can skip this cell\n# and manually set `API_KEY` below.\n\nCOLAB = \"google.colab\" in str(get_ipython())\nif COLAB:\n %pip install colab-env -qU\n from colab_env import envvar_handler\n\n envvar_handler.envload()\n\nAPI_KEY = os.environ.get(\"LABELBOX_API_KEY\")\nif not os.environ.get(\"LABELBOX_API_KEY\"):\n API_KEY = getpass(\"Please enter your labelbox api key\")\n if COLAB:\n envvar_handler.add_env(\"LABELBOX_API_KEY\", API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# If you don\"t want to give google access to drive you can skip this cell\n", - "# and manually set `API_KEY` below.\n", - "\n", - "COLAB = \"google.colab\" in str(get_ipython())\n", - "if COLAB:\n", - " %pip install colab-env -qU\n", - " from colab_env import envvar_handler\n", - "\n", - " envvar_handler.envload()\n", - "\n", - "API_KEY = os.environ.get(\"LABELBOX_API_KEY\")\n", - "if not os.environ.get(\"LABELBOX_API_KEY\"):\n", - " API_KEY = getpass(\"Please enter your labelbox api key\")\n", - " if COLAB:\n", - " envvar_handler.add_env(\"LABELBOX_API_KEY\", API_KEY)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set this to a project that you want to use for the webhook\nPROJECT_ID = \"\"\n# Only update this if you have an on-prem deployment\nENDPOINT = \"https://api.labelbox.com/graphql\"", + "cell_type": "code", "outputs": [], - "source": [ - "# Set this to a project that you want to use for the webhook\n", - "PROJECT_ID = \"\"\n", - "# Only update this if you have an on-prem deployment\n", - "ENDPOINT = \"https://api.labelbox.com/graphql\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "client = lb.Client(api_key=API_KEY, endpoint=ENDPOINT)", + "cell_type": "code", "outputs": [], - "source": [ - "client = lb.Client(api_key=API_KEY, endpoint=ENDPOINT)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# We are using port 3001 for this example.\n# Feel free to set to whatever port you want\nWH_PORT = 3001", + "cell_type": "code", "outputs": [], - "source": [ - "# We are using port 3001 for this example.\n", - "# Feel free to set to whatever port you want\n", - "WH_PORT = 3001" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Configure NGROK (Optional)\n", @@ -141,211 +97,114 @@ "2. Download ngrok and extract the zip file\n", "3. Add ngrok to your path\n", "4. Add the authtoken `ngrok authtoken `" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "if not COLAB:\n os.system(f\"ngrok http {WH_PORT} &\")", + "cell_type": "code", "outputs": [], - "source": [ - "if not COLAB:\n", - " os.system(f\"ngrok http {WH_PORT} &\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Configure server to receive requests" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# This can be any secret that matches your webhook config (we will set later)\nsecret = b\"example_secret\"", + "cell_type": "code", "outputs": [], - "source": [ - "# This can be any secret that matches your webhook config (we will set later)\n", - "secret = b\"example_secret\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "app = Flask(__name__)\n\n\n@app.route(\"/\")\ndef hello_world():\n return \"Hello, World!\"\n\n\n@app.route(\"/webhook-endpoint\", methods=[\"POST\"])\ndef print_webhook_info():\n payload = request.data\n computed_signature = hmac.new(secret, msg=payload,\n digestmod=hashlib.sha1).hexdigest()\n if request.headers[\"X-Hub-Signature\"] != \"sha1=\" + computed_signature:\n print(\n \"Error: computed_signature does not match signature provided in the headers\"\n )\n return \"Error\", 500, 200\n\n print(\"=========== New Webhook Delivery ============\")\n print(\"Delivery ID: %s\" % request.headers[\"X-Labelbox-Id\"])\n print(\"Event: %s\" % request.headers[\"X-Labelbox-Event\"])\n print(\"Payload: %s\" %\n json.dumps(json.loads(payload.decode(\"utf8\")), indent=4))\n return \"Success\"\n\n\nthread = threading.Thread(target=lambda: run_simple(\"0.0.0.0\", WH_PORT, app))\nthread.start()", + "cell_type": "code", "outputs": [], - "source": [ - "app = Flask(__name__)\n", - "\n", - "\n", - "@app.route(\"/\")\n", - "def hello_world():\n", - " return \"Hello, World!\"\n", - "\n", - "\n", - "@app.route(\"/webhook-endpoint\", methods=[\"POST\"])\n", - "def print_webhook_info():\n", - " payload = request.data\n", - " computed_signature = hmac.new(secret, msg=payload,\n", - " digestmod=hashlib.sha1).hexdigest()\n", - " if request.headers[\"X-Hub-Signature\"] != \"sha1=\" + computed_signature:\n", - " print(\n", - " \"Error: computed_signature does not match signature provided in the headers\"\n", - " )\n", - " return \"Error\", 500, 200\n", - "\n", - " print(\"=========== New Webhook Delivery ============\")\n", - " print(\"Delivery ID: %s\" % request.headers[\"X-Labelbox-Id\"])\n", - " print(\"Event: %s\" % request.headers[\"X-Labelbox-Event\"])\n", - " print(\"Payload: %s\" %\n", - " json.dumps(json.loads(payload.decode(\"utf8\")), indent=4))\n", - " return \"Success\"\n", - "\n", - "\n", - "thread = threading.Thread(target=lambda: run_simple(\"0.0.0.0\", WH_PORT, app))\n", - "thread.start()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Test server" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "print(requests.get(\"http://localhost:3001\").text)", + "cell_type": "code", "outputs": [], - "source": [ - "print(requests.get(\"http://localhost:3001\").text)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create Webhook" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "- Set ip address if your ip is publicly accessible.\n", "- Otherwise use the following to get ngrok public_url" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "if not COLAB:\n res = requests.get(\"http://localhost:4040/api/tunnels\")\n assert (res.status_code == 200\n ), f\"ngrok probably isn't running. {res.status_code}, {res.text}\"\n tunnels = res.json()[\"tunnels\"]\n tunnel = [\n t for t in tunnels if t[\"config\"][\"addr\"].split(\":\")[-1] == str(WH_PORT)\n ]\n tunnel = tunnel[0] # Should only be one..\n public_url = tunnel[\"public_url\"]\nelse:\n public_url = (\n f\"http://{socket.gethostbyname(socket.getfqdn(socket.gethostname()))}\")\nprint(public_url)", + "cell_type": "code", "outputs": [], - "source": [ - "if not COLAB:\n", - " res = requests.get(\"http://localhost:4040/api/tunnels\")\n", - " assert (res.status_code == 200\n", - " ), f\"ngrok probably isn't running. {res.status_code}, {res.text}\"\n", - " tunnels = res.json()[\"tunnels\"]\n", - " tunnel = [\n", - " t for t in tunnels if t[\"config\"][\"addr\"].split(\":\")[-1] == str(WH_PORT)\n", - " ]\n", - " tunnel = tunnel[0] # Should only be one..\n", - " public_url = tunnel[\"public_url\"]\n", - "else:\n", - " public_url = (\n", - " f\"http://{socket.gethostbyname(socket.getfqdn(socket.gethostname()))}\")\n", - "print(public_url)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set project to limit the scope to a single project\nproject = client.get_project(PROJECT_ID)\ntopics = {topic.value for topic in lb.Webhook.Topic}\n# For Global Webhooks (Global = per workspace) project = None\nwebhook = lb.Webhook.create(\n client,\n topics=topics,\n url=public_url,\n secret=secret.decode(),\n project=project,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set project to limit the scope to a single project\n", - "project = client.get_project(PROJECT_ID)\n", - "topics = {topic.value for topic in lb.Webhook.Topic}\n", - "# For Global Webhooks (Global = per workspace) project = None\n", - "webhook = lb.Webhook.create(\n", - " client,\n", - " topics=topics,\n", - " url=public_url,\n", - " secret=secret.decode(),\n", - " project=project,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Ok so we should be configured assuming everything is setup correctly.\n# Go to the following url and make a new label to see if it works\nprint(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Ok so we should be configured assuming everything is setup correctly.\n", - "# Go to the following url and make a new label to see if it works\n", - "print(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Update Webhook" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# url, topics, and status can all be updated\nupdated_url = f\"{public_url}/webhook-endpoint\"\nprint(updated_url)\nwebhook.update(url=updated_url)\n# Go to the following url and try one last time.\n# Any supported action should work (create, delete, or update a label)\nprint(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# url, topics, and status can all be updated\n", - "updated_url = f\"{public_url}/webhook-endpoint\"\n", - "print(updated_url)\n", - "webhook.update(url=updated_url)\n", - "# Go to the following url and try one last time.\n", - "# Any supported action should work (create, delete, or update a label)\n", - "print(f\"https://app.labelbox.com/projects/{PROJECT_ID}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### List and delete all webhooks" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# DELETE:\nwebhook.update(status=lb.Webhook.Status.INACTIVE.value)\n\n# FETCH ALL WEBHOOKS:\norg = client.get_organization()\nwebhooks = org.webhooks()\n\n# Run this to clear all.\n# WARNING!!! THIS WILL DELETE ALL WEBHOOKS FOR YOUR ORG\n# ONLY RUN THIS IS YOU KNOW WHAT YOU ARE DOING.\n# for webhook in webhooks:\n# print(webhook)\n# webhook.update(status = lb.Webhook.Status.INACTIVE.value)", + "cell_type": "code", "outputs": [], - "source": [ - "# DELETE:\n", - "webhook.update(status=lb.Webhook.Status.INACTIVE.value)\n", - "\n", - "# FETCH ALL WEBHOOKS:\n", - "org = client.get_organization()\n", - "webhooks = org.webhooks()\n", - "\n", - "# Run this to clear all.\n", - "# WARNING!!! THIS WILL DELETE ALL WEBHOOKS FOR YOUR ORG\n", - "# ONLY RUN THIS IS YOU KNOW WHAT YOU ARE DOING.\n", - "# for webhook in webhooks:\n", - "# print(webhook)\n", - "# webhook.update(status = lb.Webhook.Status.INACTIVE.value)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file