From 3aded85109ddeabc4a0551431700dce036844eb6 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:26:48 -0500 Subject: [PATCH 1/3] updated batches --- examples/basics/batches.ipynb | 797 +++++++++++++++++++++------------- 1 file changed, 499 insertions(+), 298 deletions(-) diff --git a/examples/basics/batches.ipynb b/examples/basics/batches.ipynb index 5b8b4080c..63d3d7ebd 100644 --- a/examples/basics/batches.ipynb +++ b/examples/basics/batches.ipynb @@ -1,299 +1,500 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "", - " ", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Batches\n", - "https://docs.labelbox.com/docs/batches" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "* A batch is collection of data rows.\n", - "* A data row cannot be part of more than one batch in a given project.\n", - "* Batches work for all data types, but there can only be one data type per project.\n", - "* Batches can not be shared between projects.\n", - "* Batches may have data rows from multiple datasets.\n", - "* Currently, only benchmarks quality settings is supported in batch projects\n", - "* You can set the priority for each batch." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "import labelbox as lb\nimport random\nimport uuid\nimport json", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## API key and client\n", - "Provide a valid API key below in order to properly connect to the Labelbox Client." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = None\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Create a dataset and data rows" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Create a dataset\ndataset = client.create_dataset(name=\"Demo-Batches-Colab\")\n\nuploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n })\n\ndata_rows = dataset.create_data_rows(uploads)\ndata_rows.wait_till_done()\nprint(\"ERRORS: \", data_rows.errors)\nprint(\"RESULT URL: \", data_rows.result_url)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Setup batch project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "project = client.create_project(name=\"Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", project.name, \"Project ID: \", project.uid)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Create batches" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "### Select all data rows from the dataset\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "client.enable_experimental = True\n\nexport_task = dataset.export()\nexport_task.wait_till_done()\n\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = json.loads(output.json)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "global_keys = [data_row[\"data_row\"][\"global_key\"] for data_row in data_rows]\nprint(\"Number of global keys:\", len(global_keys))", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Select a random sample\n", - "This method is useful if you have large datasets and only want to work with a handful of data rows" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "sample = random.sample(global_keys, 4)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create a batch\n", - "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "batch = project.create_batch(\n name=\"Demo-First-Batch\", # Each batch in a project must have a unique name\n global_keys=sample, # A list of data rows or data row ids\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n# number of data rows in the batch\nprint(\"Number of data rows in batch: \", batch.size)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create multiple batches\n", - "The `project.create_batches()` method accepts up to 1 million data rows. Batches are chunked into groups of 100k if necessary, which is the maximum batch size. This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method.\n", - "\n", - "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method. Batches will be created with the specified `name_prefix` argument and a unique suffix to ensure unique batch names. The suffix will be a 4-digit number starting at `0000`.\n", - "\n", - "For example, if the name prefix is `demo-create-batches-` and three batches are created, the names will be `demo-create-batches-0000`, `demo-create-batches-0001`, and `demo-create-batches-0002`. This method will throw an error if a batch with the same name already exists.\n", - "\n", - "In the code below, only one batch will be created, since we are only using the few data rows we created above. Creating over 100k data rows for this demonstration is not sensible, but this method is the preferred approach for batch creation as it will gracefully handle massive sets of data rows." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# First, we must create a second project so that we can re-use the data rows we already created.\nsecond_project = client.create_project(name=\"Second-Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", second_project.name, \"Project ID: \", second_project.uid)\n\n# Then, use the method that will create multiple batches if necessary.\ntask = second_project.create_batches(name_prefix=\"demo-create-batches-\",\n global_keys=global_keys,\n priority=5)\n\nprint(\"Errors: \", task.errors())\nprint(\"Result: \", task.result())", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create batches from a dataset\n", - "\n", - "If you wish to create batches in a project using all the data rows of a dataset, instead of having to gather global keys or ID and using subsets of data rows, you can use the `project.create_batches_from_dataset()` method. This method takes in a dataset ID and creates a batch (or batches if there are more than 100k data rows) comprised of all data rows not already in the project.\n", - "\n", - "The same logic applies to the `name_prefix` argument and the naming of batches as described in the section immediately above." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# First, we must create a third project so that we can re-use the data rows we already created.\nthird_project = client.create_project(name=\"Third-Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", third_project.name, \"Project ID: \", third_project.uid)\n\n# Then, use the method to create batches from a dataset.\ntask = third_project.create_batches_from_dataset(\n name_prefix=\"demo-batches-from-dataset-\",\n dataset_id=dataset.uid,\n priority=5)\n\nprint(\"Errors: \", task.errors())\nprint(\"Result: \", task.result())", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Manage Batches\n", - "Note: You can view your batch data through the **Data Rows** tab." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "### Export Batches" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "#### Create and Attach Ontology to Project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "#### Export from Project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "client.enable_experimental = True\n\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\": [\n batch.uid\n ], # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = json.loads(output.json)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "## Export the data row iDs\ndata_rows = [dr for dr in data_rows]\nprint(\"Data rows in batch: \", data_rows)\n\n## List the batches in your project\nfor batch in project.batches():\n print(\"Batch name: \", batch.name, \" Batch ID:\", batch.uid)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Archive a batch" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Archiving a batch removes all queued data rows in the batch from the project\nbatch.remove_queued_data_rows()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Clean up \n", - "Uncomment and run the cell below to optionally delete the batch, dataset, and/or project created in this demo." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Delete Batch\n# batch.delete()\n\n# Delete Project\n# project.delete()\n\n# Delete DataSet\n# dataset.delete()", - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Batches\n", + "https://docs.labelbox.com/docs/batches" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* A batch is collection of data rows.\n", + "* A data row cannot be part of more than one batch in a given project.\n", + "* Batches work for all data types, but there can only be one data type per project.\n", + "* Batches can not be shared between projects.\n", + "* Batches may have data rows from multiple datasets.\n", + "* Currently, only benchmarks quality settings is supported in batch projects\n", + "* You can set the priority for each batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q --upgrade \"labelbox[data]\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import labelbox as lb\n", + "import random\n", + "import uuid\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API key and client\n", + "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "API_KEY = None\n", + "client = lb.Client(api_key=API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a dataset and data rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62639b4f", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"Demo-Batches-Colab\")\n", + "\n", + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 9):\n", + " uploads.append(\n", + " {\n", + " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", + " }\n", + " )\n", + "\n", + "data_rows = dataset.create_data_rows(uploads)\n", + "data_rows.wait_till_done()\n", + "print(\"ERRORS: \", data_rows.errors)\n", + "print(\"RESULT URL: \", data_rows.result_url)" + ] + }, + { + "cell_type": "markdown", + "id": "d2384377", + "metadata": {}, + "source": [ + "## Setup batch project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddabec74", + "metadata": {}, + "outputs": [], + "source": [ + "project = client.create_project(\n", + " name=\"Demo-Batches-Project\", media_type=lb.MediaType.Image\n", + ")\n", + "print(\"Project Name: \", project.name, \"Project ID: \", project.uid)" + ] + }, + { + "cell_type": "markdown", + "id": "6a242cc8", + "metadata": {}, + "source": [ + "## Create batches" + ] + }, + { + "cell_type": "markdown", + "id": "bf21de1c", + "metadata": {}, + "source": [ + "### Select all data rows from the dataset\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23edfe21", + "metadata": {}, + "outputs": [], + "source": [ + "export_task = dataset.export()\n", + "export_task.wait_till_done()\n", + "\n", + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(data_row)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error)\n", + " )\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f90a541c", + "metadata": {}, + "outputs": [], + "source": [ + "global_keys = [data_row[\"data_row\"][\"global_key\"] for data_row in data_rows]\n", + "print(\"Number of global keys:\", len(global_keys))" + ] + }, + { + "cell_type": "markdown", + "id": "1676b642", + "metadata": {}, + "source": [ + "### Select a random sample\n", + "This method is useful if you have large datasets and only want to work with a handful of data rows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "591c131a", + "metadata": {}, + "outputs": [], + "source": [ + "sample = random.sample(global_keys, 4)" + ] + }, + { + "cell_type": "markdown", + "id": "be5d70cb", + "metadata": {}, + "source": [ + "### Create a batch\n", + "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1683234", + "metadata": {}, + "outputs": [], + "source": [ + "batch = project.create_batch(\n", + " name=\"Demo-First-Batch\", # Each batch in a project must have a unique name\n", + " global_keys=sample, # A list of data rows or data row ids\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "# number of data rows in the batch\n", + "print(\"Number of data rows in batch: \", batch.size)" + ] + }, + { + "cell_type": "markdown", + "id": "0937b16c", + "metadata": {}, + "source": [ + "### Create multiple batches\n", + "The `project.create_batches()` method accepts up to 1 million data rows. Batches are chunked into groups of 100k if necessary, which is the maximum batch size. This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method.\n", + "\n", + "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method. Batches will be created with the specified `name_prefix` argument and a unique suffix to ensure unique batch names. The suffix will be a 4-digit number starting at `0000`.\n", + "\n", + "For example, if the name prefix is `demo-create-batches-` and three batches are created, the names will be `demo-create-batches-0000`, `demo-create-batches-0001`, and `demo-create-batches-0002`. This method will throw an error if a batch with the same name already exists.\n", + "\n", + "In the code below, only one batch will be created, since we are only using the few data rows we created above. Creating over 100k data rows for this demonstration is not sensible, but this method is the preferred approach for batch creation as it will gracefully handle massive sets of data rows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e07f8bc8", + "metadata": {}, + "outputs": [], + "source": [ + "# First, we must create a second project so that we can re-use the data rows we already created.\n", + "second_project = client.create_project(\n", + " name=\"Second-Demo-Batches-Project\", media_type=lb.MediaType.Image\n", + ")\n", + "print(\"Project Name: \", second_project.name, \"Project ID: \", second_project.uid)\n", + "\n", + "# Then, use the method that will create multiple batches if necessary.\n", + "task = second_project.create_batches(\n", + " name_prefix=\"demo-create-batches-\", global_keys=global_keys, priority=5\n", + ")\n", + "\n", + "print(\"Errors: \", task.errors())\n", + "print(\"Result: \", task.result())" + ] + }, + { + "cell_type": "markdown", + "id": "8b094283", + "metadata": {}, + "source": [ + "### Create batches from a dataset\n", + "\n", + "If you wish to create batches in a project using all the data rows of a dataset, instead of having to gather global keys or ID and using subsets of data rows, you can use the `project.create_batches_from_dataset()` method. This method takes in a dataset ID and creates a batch (or batches if there are more than 100k data rows) comprised of all data rows not already in the project.\n", + "\n", + "The same logic applies to the `name_prefix` argument and the naming of batches as described in the section immediately above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "597ad3c6", + "metadata": {}, + "outputs": [], + "source": [ + "# First, we must create a third project so that we can re-use the data rows we already created.\n", + "third_project = client.create_project(\n", + " name=\"Third-Demo-Batches-Project\", media_type=lb.MediaType.Image\n", + ")\n", + "print(\"Project Name: \", third_project.name, \"Project ID: \", third_project.uid)\n", + "\n", + "# Then, use the method to create batches from a dataset.\n", + "task = third_project.create_batches_from_dataset(\n", + " name_prefix=\"demo-batches-from-dataset-\", dataset_id=dataset.uid, priority=5\n", + ")\n", + "\n", + "print(\"Errors: \", task.errors())\n", + "print(\"Result: \", task.result())" + ] + }, + { + "cell_type": "markdown", + "id": "bf36d1f4", + "metadata": {}, + "source": [ + "## Manage Batches\n", + "Note: You can view your batch data through the **Data Rows** tab." + ] + }, + { + "cell_type": "markdown", + "id": "920f0336", + "metadata": {}, + "source": [ + "### Export Batches" + ] + }, + { + "cell_type": "markdown", + "id": "646f4bd7", + "metadata": {}, + "source": [ + "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." + ] + }, + { + "cell_type": "markdown", + "id": "9292f4a2", + "metadata": {}, + "source": [ + "#### Create and Attach Ontology to Project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1a11b9b", + "metadata": {}, + "outputs": [], + "source": [ + "classification_features = [\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"Quality Issues\",\n", + " options=[\n", + " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", + " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", + " ],\n", + " )\n", + "]\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[], classifications=classification_features\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology from new features\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")\n", + "\n", + "project.setup_editor(ontology)" + ] + }, + { + "cell_type": "markdown", + "id": "c8e6723d", + "metadata": {}, + "source": [ + "#### Export from Project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "506f8c9c", + "metadata": {}, + "outputs": [], + "source": [ + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"performance_details\": True,\n", + " \"batch_ids\": [\n", + " batch.uid\n", + " ], # Include batch ids if you only want to export specific batches, otherwise,\n", + " # you can export all the data without using this parameter\n", + "}\n", + "filters = {}\n", + "\n", + "# A task is returned, this provides additional information about the status of your task, such as\n", + "# any errors encountered\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3ba9797", + "metadata": {}, + "outputs": [], + "source": [ + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(data_row)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error)\n", + " )\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Export the data row iDs\n", + "data_rows = [dr for dr in data_rows]\n", + "print(\"Data rows in batch: \", data_rows)\n", + "\n", + "## List the batches in your project\n", + "for batch in project.batches():\n", + " print(\"Batch name: \", batch.name, \" Batch ID:\", batch.uid)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Archive a batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Archiving a batch removes all queued data rows in the batch from the project\n", + "batch.remove_queued_data_rows()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up \n", + "Uncomment and run the cell below to optionally delete the batch, dataset, and/or project created in this demo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete Batch\n", + "# batch.delete()\n", + "\n", + "# Delete Project\n", + "# project.delete()\n", + "\n", + "# Delete DataSet\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From f6769d79d2c50a323f480fed24d5f1b4ad270a6e Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:16:40 -0500 Subject: [PATCH 2/3] modified task export to remove json.loads --- examples/annotation_import/pdf.ipynb | 1030 ++++++++-- examples/basics/batches.ipynb | 22 +- examples/basics/custom_embeddings.ipynb | 301 ++- examples/basics/data_rows.ipynb | 891 +++++--- examples/basics/projects.ipynb | 480 +++-- examples/exports/composite_mask_export.ipynb | 269 ++- .../custom_metrics_demo.ipynb | 1823 +++++++++++++++-- examples/model_experiments/model_slices.ipynb | 285 ++- .../prediction_upload/pdf_predictions.ipynb | 1046 ++++++++-- .../queue_management.ipynb | 585 ++++-- 10 files changed, 5418 insertions(+), 1314 deletions(-) diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 8e037f6a2..526620dec 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 1, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -56,137 +54,538 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_annotations = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_annotations_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "# NDJSON\n", + "radio_annotation_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\"\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\"\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\"\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", + " end=lb_types.Point(x=518.571,\n", + " y=245.143), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_annotation_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": {\n", + " \"top\": 135.3,\n", + " \"left\": 102.771,\n", + " \"height\": 109.843,\n", + " \"width\": 415.8\n", + " },\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_annotation_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\"),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", + " end=lb_types.Point(x=566.657,\n", + " y=420.986), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\"\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\"\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### Relationships ##########\n", + "entity_source = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_target = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source = str(uuid.uuid4())\n", + "uuid_target = str(uuid.uuid4())\n", + "\n", + "entity_source_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_source,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "\n", + "entity_target_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"uuid\":\n", + " uuid_target,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}\n", + "ner_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source,\n", + " \"target\": uuid_target,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with relationships #############\n", + "# Python Annotation\n", + "bbox_source = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", + " end=lb_types.Point(x=270.907,\n", + " y=149.556), # x = left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_target = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=96.424, y=66.251),\n", + " end=lb_types.Point(x=179.074, y=146.932),\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + ")\n", + "\n", + "bbox_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=bbox_source,\n", + " target=bbox_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")\n", + "\n", + "## Only supported for MAL imports\n", + "uuid_source_2 = str(uuid.uuid4())\n", + "uuid_target_2 = str(uuid.uuid4())\n", + "\n", + "bbox_source_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_source_2,\n", + " \"bbox\": {\n", + " \"top\": 68.875,\n", + " \"left\": 188.257,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_target_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"uuid\": uuid_target_2,\n", + " \"bbox\": {\n", + " \"top\": 66.251,\n", + " \"left\": 96.424,\n", + " \"height\": 80.681,\n", + " \"width\": 82.65\n", + " },\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}\n", + "\n", + "bbox_relationship_annotation_ndjson = {\n", + " \"name\": \"relationship\",\n", + " \"relationship\": {\n", + " \"source\": uuid_source_2,\n", + " \"target\": uuid_target_2,\n", + " \"type\": \"unidirectional\",\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -200,60 +599,206 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"PDF_annotation_demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -262,124 +807,357 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_buffered_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = output.json\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + " \"Organic charge transfer salts based on the donor\",\n", + " \"the experimental investigations on this issue have not\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "text_selections_source = []\n", + "text_selections_target = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_annotations_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[2]:\n", + " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_source = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", + " text_selections_source.append(text_selection_entity_source)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_source_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_source, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[3]:\n", + " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " text_selection_entity_target = lb_types.DocumentTextSelection(\n", + " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", + " text_selections_target.append(text_selection_entity_target)\n", + " # build text selections forthe NDJson annotations\n", + " update_text_selections(\n", + " annotation=entity_target_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " relationship_target, # ids representing individual words from the group\n", + " page=1,\n", + " )" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# re-write the entity annotation with text selections\n", + "entities_annotation_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", textSelections=text_selections)\n", + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_annotation_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# re-write the entity source and target annotations withe text selectios\n", + "entity_source_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_source)\n", + "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_source_doc)\n", + "\n", + "entity_target_doc = lb_types.DocumentEntity(\n", + " name=\"named_entity\", text_selections=text_selections_target)\n", + "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", + " value=entity_target_doc)\n", + "\n", + "# re-write the entity relationship with the re-created entities\n", + "entity_relationship = lb_types.RelationshipAnnotation(\n", + " name=\"relationship\",\n", + " value=lb_types.Relationship(\n", + " source=entity_source,\n", + " target=entity_target,\n", + " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", + " ),\n", + ")" + ] }, { - "metadata": {}, - "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", + "print(f\"entities_annotation={entities_annotation}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", + "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", + "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", + "print(f\"entity_source={entity_source}\")\n", + "print(f\"entity_target={entity_target}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " entity_source,\n", + " entity_target,\n", + " entity_relationship, # Only supported for MAL imports\n", + " bbox_source,\n", + " bbox_target,\n", + " bbox_relationship, # Only supported for MAL imports\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_ndjson = []\n", + "for annot in [\n", + " entities_annotations_ndjson,\n", + " checklist_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson,\n", + " text_annotation_ndjson,\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " bbox_with_radio_subclass_annotation_ndjson,\n", + " ner_with_checklist_subclass_annotation_ndjson,\n", + " entity_source_ndjson,\n", + " entity_target_ndjson,\n", + " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", + " bbox_source_ndjson,\n", + " bbox_target_ndjson,\n", + " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job = lb.MALPredictionImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", + " predictions=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job.errors)\n", + "print(\"Status of uploads: \", upload_job.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Uncomment this code when excluding relationships from label import\n", + "## Relationships are not currently supported for label import\n", + "\n", + "# upload_job = lb.LabelImport.create_from_objects(\n", + "# client = client,\n", + "# project_id = project.uid,\n", + "# name=\"label_import_job\"+str(uuid.uuid4()),\n", + "# labels=labels) ## Remove unsupported relationships from the labels list\n", + "\n", + "# print(\"Errors:\", upload_job.errors)\n", + "# print(\"Status of uploads: \", upload_job.statuses)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/examples/basics/batches.ipynb b/examples/basics/batches.ipynb index 63d3d7ebd..f111bdce6 100644 --- a/examples/basics/batches.ipynb +++ b/examples/basics/batches.ipynb @@ -28,14 +28,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Batches\n", - "https://docs.labelbox.com/docs/batches" + "# Batches" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "This notebook covers the basics of batches:\n", + "\n", "* A batch is collection of data rows.\n", "* A data row cannot be part of more than one batch in a given project.\n", "* Batches work for all data types, but there can only be one data type per project.\n", @@ -45,6 +46,14 @@ "* You can set the priority for each batch." ] }, + { + "cell_type": "markdown", + "id": "b5641a6d", + "metadata": {}, + "source": [ + "## Set up" + ] + }, { "cell_type": "code", "execution_count": null, @@ -469,8 +478,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Clean up \n", - "Uncomment and run the cell below to optionally delete the batch, dataset, and/or project created in this demo." + "## Clean up\n", + "Uncomment and run the cell below to optionally delete Labelbox objects created." ] }, { @@ -479,13 +488,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Delete Batch\n", "# batch.delete()\n", - "\n", - "# Delete Project\n", "# project.delete()\n", - "\n", - "# Delete DataSet\n", "# dataset.delete()" ] } diff --git a/examples/basics/custom_embeddings.ipynb b/examples/basics/custom_embeddings.ipynb index 7eaf030bc..71d66d31f 100644 --- a/examples/basics/custom_embeddings.ipynb +++ b/examples/basics/custom_embeddings.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,263 +22,366 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Custom Embeddings\n", "\n", "You can improve your data exploration and similarity search experience by adding your own custom embeddings. Labelbox allows you to upload up to 10 different custom embeddings per workspace on any kind of data. You can experiment with different embeddings to power your data selection." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Set up " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip3 install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q --upgrade \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport numpy as np\nimport json\nimport uuid\nimport random", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import numpy as np\n", + "import json\n", + "import uuid\n", + "import random" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Select data rows" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "- Get images from a Labelbox dataset\n", "- To improve similarity search, you need to upload custom embeddings to at least 1,000 data rows.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "DATASET_ID = \"\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "DATASET_ID = \"\"" + ] }, { - "metadata": {}, - "source": "dataset = client.get_dataset(dataset_id=DATASET_ID)\nexport_task = dataset.export()\nexport_task.wait_till_done()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "dataset = client.get_dataset(dataset_id=DATASET_ID)\n", + "export_task = dataset.export()\n", + "export_task.wait_till_done()" + ] }, { - "metadata": {}, - "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = json.loads(output.json)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(data_row)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error))\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT).start(\n", + " stream_handler=json_stream_handler)" + ] }, { - "metadata": {}, - "source": "data_row_dict = [{\"data_row_id\": dr[\"data_row\"][\"id\"]} for dr in data_rows]\ndata_row_dict = data_row_dict[:\n 1000] # keep the first 1000 examples for the sake of this demo", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_row_dict = [{\"data_row_id\": dr[\"data_row\"][\"id\"]} for dr in data_rows]\n", + "data_row_dict = data_row_dict[:\n", + " 1000] # keep the first 1000 examples for the sake of this demo" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Create custom embedding payload " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Generate random vectors for embeddings (max : 2048 dimensions)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "nb_data_rows = len(data_row_dict)\nprint(\"Number of data rows: \", nb_data_rows)\n# Labelbox supports custom embedding vectors of dimension up to 2048\ncustom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "nb_data_rows = len(data_row_dict)\n", + "print(\"Number of data rows: \", nb_data_rows)\n", + "# Labelbox supports custom embedding vectors of dimension up to 2048\n", + "custom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "List all custom embeddings available in your Labelbox workspace" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "embeddings = client.get_embeddings()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "embeddings = client.get_embeddings()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Choose an existing embedding type or create a new one" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Name of the custom embedding must be unique\nembedding = client.create_embedding(\"my_custom_embedding_2048_dimensions\", 2048)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Name of the custom embedding must be unique\n", + "embedding = client.create_embedding(\"my_custom_embedding_2048_dimensions\", 2048)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Create payload" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "The payload should encompass the `key` (data row id or global key) and the new embedding vector data. Note that the `dataset.upsert_data_rows()` operation will only update the values you pass in the payload; all other existing row data will not be modified." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "payload = []\nfor data_row_dict, custom_embedding in zip(data_row_dict, custom_embeddings):\n payload.append({\n \"key\":\n lb.UniqueId(data_row_dict[\"data_row_id\"]),\n \"embeddings\": [{\n \"embedding_id\": embedding.id,\n \"vector\": custom_embedding\n }],\n })\n\nprint(\"payload\", len(payload), payload[:1])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "payload = []\n", + "for data_row_dict, custom_embedding in zip(data_row_dict, custom_embeddings):\n", + " payload.append({\n", + " \"key\":\n", + " lb.UniqueId(data_row_dict[\"data_row_id\"]),\n", + " \"embeddings\": [{\n", + " \"embedding_id\": embedding.id,\n", + " \"vector\": custom_embedding\n", + " }],\n", + " })\n", + "\n", + "print(\"payload\", len(payload), payload[:1])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Upload payload" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Upsert data rows with custom embeddings" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "task = dataset.upsert_data_rows(payload)\ntask.wait_till_done()\nprint(task.errors)\nprint(task.status)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "task = dataset.upsert_data_rows(payload)\n", + "task.wait_till_done()\n", + "print(task.errors)\n", + "print(task.status)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Get the count of imported vectors for a custom embedding" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Count how many data rows have a specific custom embedding (this can take a couple of minutes)\ncount = embedding.get_imported_vector_count()\nprint(count)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Count how many data rows have a specific custom embedding (this can take a couple of minutes)\n", + "count = embedding.get_imported_vector_count()\n", + "print(count)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Delete custom embedding type" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# embedding.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# embedding.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Upload custom embeddings during data row creation" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Create a dataset" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a dataset\ndataset_new = client.create_dataset(name=\"data_rows_with_embeddings\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a dataset\n", + "dataset_new = client.create_dataset(name=\"data_rows_with_embeddings\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Fetch an embedding (2048 dimension)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "embedding = client.get_embedding_by_name(\"my_custom_embedding_2048_dimensions\")\nvector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)]", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "embedding = client.get_embedding_by_name(\"my_custom_embedding_2048_dimensions\")\n", + "vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)]" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Upload data rows with embeddings" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n \"embeddings\": [{\n \"embedding_id\": embedding.id,\n \"vector\": vector\n }],\n })\n\ntask1 = dataset_new.create_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 9):\n", + " uploads.append({\n", + " \"row_data\":\n", + " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\":\n", + " \"TEST-ID-%id\" % uuid.uuid1(),\n", + " \"embeddings\": [{\n", + " \"embedding_id\": embedding.id,\n", + " \"vector\": vector\n", + " }],\n", + " })\n", + "\n", + "task1 = dataset_new.create_data_rows(uploads)\n", + "task1.wait_till_done()\n", + "print(\"ERRORS: \", task1.errors)\n", + "print(\"RESULTS:\", task1.result)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index a6c87e691..1efb4967f 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -1,315 +1,578 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "", - " ", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Data rows" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", - " * Image\n", - " * Text\n", - " * Video\n", - " * Geospatial / Tiled Imagery\n", - " * Audio\n", - " * Documents \n", - " * HTML \n", - " * DICOM \n", - " * Conversational\n", - "* A data row cannot exist without belonging to a dataset.\n", - "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "%pip install labelbox -q", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid\nimport json", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Get data rows from projects" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Pick a project with batches that have data rows with global keys\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)\nbatches = list(project.batches())\nprint(batches)\n# This is the same as\n# -> dataset = client.get_dataset(dataset_id)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Fetch data rows from project's batches\n", - "\n", - "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "client.enable_experimental = True\n\nbatch_ids = [batch.uid for batch in batches]\n\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\":\n batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = json.loads(output.json)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Get single data row\ndata_row = data_rows[0]\nprint(data_row)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Get labels from the data row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\nprint(\"Global key\", data_row[\"data_row\"][\"global_key\"])", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Get data row ids by using global keys" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "global_key = \"\"\ntask = client.get_data_row_ids_for_global_keys([global_key])\nprint(f\"Data row id: {task['results']}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Create\n", - "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "### Create data rows via `dataset.upsert_data_rows()`" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Create a dataset\ndataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n# You can also upload metadata along with your data row\nmdo = client.get_data_row_metadata_ontology()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "uploads = []\n# Generate data rows\nfor i in range(1, 8):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n \"attachments\": [\n {\n \"type\":\n \"IMAGE_OVERLAY\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\": \"RAW_TEXT\",\n \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n },\n {\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n },\n {\n \"type\":\n \"IMAGE\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\":\n \"VIDEO\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n },\n {\n \"type\":\n \"HTML\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n },\n {\n \"type\":\n \"PDF_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n },\n ],\n })\n\ntask1 = dataset.upsert_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "Create data rows from data in your local path " - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "from PIL import Image\n\n# Create dummy empty jpeg file\nwidth = 400\nheight = 300\ncolor = (255, 255, 255) # White color\nimage = Image.new(\"RGB\", (width, height), color)\n\n# Save the image as a JPEG file\nimage.save(\"dummy.jpg\")\n\nlocal_data_path = \"dummy.jpg\"\n\ndata = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n\ntask3 = dataset.upsert_data_rows([data])\ntask3.wait_till_done()\nprint(\"ERRORS: \", task3.errors)\nprint(\"RESULTS:\", task3.result)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# You can mix local files with urls when creating data rows\ntask4 = dataset.upsert_data_rows([\n {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n \"global_key\":\n str(uuid.uuid4()),\n },\n {\n \"row_data\": local_data_path,\n \"global_key\": str(uuid.uuid4())\n },\n])\ntask4.wait_till_done()\nprint(\"ERRORS: \", task4.errors)\nprint(\"RESULTS:\", task4.result)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create data rows via `dataset.create_data_rows()`\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n })\n\ntask1_2 = dataset_2.create_data_rows(uploads)\ntask1_2.wait_till_done()\nprint(\"ERRORS: \", task1_2.errors)\nprint(\"RESULTS:\", task1_2.result)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Update\n", - "`dataset.upsert_data_rows()` can also be use to update data rows\n", - "\n", - "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Fetch a data row from the first dataset example\nts = dataset.export()\nts.wait_till_done()\nDATA_ROW_ID = [json.loads(output.json) for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"id\"]\nGLOBAL_KEY = [json.loads(output.json) for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"global_key\"]\n\nprint(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\ndata = {\n \"key\":\n lb.UniqueId(DATA_ROW_ID),\n \"global_key\":\n \"NEW-ID-%id\" % uuid.uuid1(),\n \"metadata_fields\": [\n # New metadata\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n value=\"2000-01-01 00:00:00\",\n ),\n # Include original metadata otherwise it will be removed\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].uid,\n value=\"tag_string\",\n ),\n ],\n}\n\ntask5 = dataset_2.upsert_data_rows([data])\ntask5.wait_till_done()\nprint(\"ERRORS: \", task5.errors)\nprint(\"RESULTS:\", task5.result)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Create a single attachment on an existing data row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# You can only create one attachment at the time.\nDATA_ROW_ID = \"\"\ndata_row = client.get_data_row(DATA_ROW_ID)\nattachment = data_row.create_attachment(\n attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "Update a recently created attachment " - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Delete" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "* Delete a single data row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "DATAROW_ID_TO_DELETE = \"\"\ndata_row = client.get_data_row(DATAROW_ID_TO_DELETE)\ndata_row.delete()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "* Bulk delete data row objects" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\nlb.DataRow.bulk_delete(list(dataset.data_rows()))", - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data rows" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", + " * Image\n", + " * Text\n", + " * Video\n", + " * Geospatial / Tiled Imagery\n", + " * Audio\n", + " * Documents \n", + " * HTML \n", + " * DICOM \n", + " * Conversational\n", + "* A data row cannot exist without belonging to a dataset.\n", + "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install labelbox -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import labelbox as lb\n", + "import uuid\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# API Key and Client\n", + "Provide a valid api key below in order to properly connect to the Labelbox Client." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add your api key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get data rows from projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Pick a project with batches that have data rows with global keys\n", + "PROJECT_ID = \"\"\n", + "project = client.get_project(PROJECT_ID)\n", + "batches = list(project.batches())\n", + "print(batches)\n", + "# This is the same as\n", + "# -> dataset = client.get_dataset(dataset_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fetch data rows from project's batches\n", + "\n", + "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f576eb64", + "metadata": {}, + "outputs": [], + "source": [ + "client.enable_experimental = True\n", + "\n", + "batch_ids = [batch.uid for batch in batches]\n", + "\n", + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"performance_details\": True,\n", + " \"batch_ids\": batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n", + " # you can export all the data without using this parameter\n", + "}\n", + "filters = {}\n", + "\n", + "# A task is returned, this provides additional information about the status of your task, such as\n", + "# any errors encountered\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d102d73e", + "metadata": {}, + "outputs": [], + "source": [ + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(data_row)\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error)\n", + " )\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "57300cab", + "metadata": {}, + "outputs": [], + "source": [ + "# Get single data row\n", + "data_row = data_rows[0]\n", + "print(data_row)" + ] + }, + { + "cell_type": "markdown", + "id": "fc079896", + "metadata": {}, + "source": [ + "### Get labels from the data row" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ee4b998", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\n", + "print(\"Global key\", data_row[\"data_row\"][\"global_key\"])" + ] + }, + { + "cell_type": "markdown", + "id": "89edfb3c", + "metadata": {}, + "source": [ + "### Get data row ids by using global keys" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e539795", + "metadata": {}, + "outputs": [], + "source": [ + "global_key = \"\"\n", + "task = client.get_data_row_ids_for_global_keys([global_key])\n", + "print(f\"Data row id: {task['results']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "aa928b53", + "metadata": {}, + "source": [ + "## Create\n", + "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " + ] + }, + { + "cell_type": "markdown", + "id": "7342388c", + "metadata": {}, + "source": [ + "### Create data rows via `dataset.upsert_data_rows()`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86e4e2b8", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a dataset\n", + "dataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n", + "# You can also upload metadata along with your data row\n", + "mdo = client.get_data_row_metadata_ontology()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f52ac7a2", + "metadata": {}, + "outputs": [], + "source": [ + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 8):\n", + " uploads.append(\n", + " {\n", + " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", + " ## add metadata (optional)\n", + " \"metadata_fields\": [\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\n", + " \"tag\"\n", + " ].uid, # specify the schema id\n", + " value=\"tag_string\", # typed inputs\n", + " ),\n", + " ],\n", + " \"attachments\": [\n", + " {\n", + " \"type\": \"IMAGE_OVERLAY\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", + " },\n", + " {\n", + " \"type\": \"RAW_TEXT\",\n", + " \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n", + " },\n", + " {\n", + " \"type\": \"TEXT_URL\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", + " },\n", + " {\n", + " \"type\": \"IMAGE\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", + " },\n", + " {\n", + " \"type\": \"VIDEO\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n", + " },\n", + " {\n", + " \"type\": \"HTML\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n", + " },\n", + " {\n", + " \"type\": \"PDF_URL\",\n", + " \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", + " },\n", + " ],\n", + " }\n", + " )\n", + "\n", + "task1 = dataset.upsert_data_rows(uploads)\n", + "task1.wait_till_done()\n", + "print(\"ERRORS: \", task1.errors)\n", + "print(\"RESULTS:\", task1.result)" + ] + }, + { + "cell_type": "markdown", + "id": "7c2c4cdc", + "metadata": {}, + "source": [ + "Create data rows from data in your local path " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed492ffc", + "metadata": {}, + "outputs": [], + "source": [ + "from PIL import Image\n", + "\n", + "# Create dummy empty jpeg file\n", + "width = 400\n", + "height = 300\n", + "color = (255, 255, 255) # White color\n", + "image = Image.new(\"RGB\", (width, height), color)\n", + "\n", + "# Save the image as a JPEG file\n", + "image.save(\"dummy.jpg\")\n", + "\n", + "local_data_path = \"dummy.jpg\"\n", + "\n", + "data = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n", + "\n", + "task3 = dataset.upsert_data_rows([data])\n", + "task3.wait_till_done()\n", + "print(\"ERRORS: \", task3.errors)\n", + "print(\"RESULTS:\", task3.result)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33db6b42", + "metadata": {}, + "outputs": [], + "source": [ + "# You can mix local files with urls when creating data rows\n", + "task4 = dataset.upsert_data_rows(\n", + " [\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", + " \"global_key\": str(uuid.uuid4()),\n", + " },\n", + " {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())},\n", + " ]\n", + ")\n", + "task4.wait_till_done()\n", + "print(\"ERRORS: \", task4.errors)\n", + "print(\"RESULTS:\", task4.result)" + ] + }, + { + "cell_type": "markdown", + "id": "75a359ae", + "metadata": {}, + "source": [ + "### Create data rows via `dataset.create_data_rows()`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b02d317", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71871d7e", + "metadata": {}, + "outputs": [], + "source": [ + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 9):\n", + " uploads.append(\n", + " {\n", + " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", + " ## add metadata (optional)\n", + " \"metadata_fields\": [\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\n", + " \"tag\"\n", + " ].uid, # specify the schema id\n", + " value=\"tag_string\", # typed inputs\n", + " ),\n", + " ],\n", + " }\n", + " )\n", + "\n", + "task1_2 = dataset_2.create_data_rows(uploads)\n", + "task1_2.wait_till_done()\n", + "print(\"ERRORS: \", task1_2.errors)\n", + "print(\"RESULTS:\", task1_2.result)" + ] + }, + { + "cell_type": "markdown", + "id": "4ef3f18e", + "metadata": {}, + "source": [ + "### Update\n", + "`dataset.upsert_data_rows()` can also be use to update data rows\n", + "\n", + "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a41ef462", + "metadata": {}, + "outputs": [], + "source": [ + "# Fetch a data row from the first dataset example\n", + "ts = dataset.export()\n", + "ts.wait_till_done()\n", + "DATA_ROW_ID = [output.json for output in ts.get_buffered_stream()][\n", + " 0\n", + "][\"data_row\"][\"id\"]\n", + "GLOBAL_KEY = [output.json for output in ts.get_buffered_stream()][\n", + " 0\n", + "][\"data_row\"][\"global_key\"]\n", + "\n", + "print(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86628344", + "metadata": {}, + "outputs": [], + "source": [ + "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n", + "data = {\n", + " \"key\": lb.UniqueId(DATA_ROW_ID),\n", + " \"global_key\": \"NEW-ID-%id\" % uuid.uuid1(),\n", + " \"metadata_fields\": [\n", + " # New metadata\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n", + " value=\"2000-01-01 00:00:00\",\n", + " ),\n", + " # Include original metadata otherwise it will be removed\n", + " lb.DataRowMetadataField(\n", + " schema_id=mdo.reserved_by_name[\"tag\"].uid,\n", + " value=\"tag_string\",\n", + " ),\n", + " ],\n", + "}\n", + "\n", + "task5 = dataset_2.upsert_data_rows([data])\n", + "task5.wait_till_done()\n", + "print(\"ERRORS: \", task5.errors)\n", + "print(\"RESULTS:\", task5.result)" + ] + }, + { + "cell_type": "markdown", + "id": "f9f9cb02", + "metadata": {}, + "source": [ + "### Create a single attachment on an existing data row" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b990d63", + "metadata": {}, + "outputs": [], + "source": [ + "# You can only create one attachment at the time.\n", + "DATA_ROW_ID = \"\"\n", + "data_row = client.get_data_row(DATA_ROW_ID)\n", + "attachment = data_row.create_attachment(\n", + " attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Update a recently created attachment " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Delete a single data row" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DATAROW_ID_TO_DELETE = \"\"\n", + "data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n", + "data_row.delete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Bulk delete data row objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\n", + "lb.DataRow.bulk_delete(list(dataset.data_rows()))" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/basics/projects.ipynb b/examples/basics/projects.ipynb index c9e5e5813..1201c9b56 100644 --- a/examples/basics/projects.ipynb +++ b/examples/basics/projects.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,18 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "# Projects" - ], - "cell_type": "markdown" + "# Projects\n", + "This notebook covers the basics of projects:" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "* A project can be thought of as a specific labeling task on a set of labels\n", @@ -42,275 +41,440 @@ "* Each project has an ontology which defines the types of annotations supported during the labeling process\n", "**Note that there is a lot of advanced usage that is not covered in this notebook. See examples/project_configuration/project_setup.ipynb for those functions**\n", "* Also note that deprecated functions are not explained here." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", + "id": "864da4c5", "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", + "source": [ + "## Set up" + ] + }, + { "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q --upgrade \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nfrom labelbox.schema.conflict_resolution_strategy import (\n ConflictResolutionStrategy,)\nimport uuid", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import labelbox.types as lb_types\n", + "from labelbox.schema.conflict_resolution_strategy import (\n", + " ConflictResolutionStrategy,)\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ], - "cell_type": "markdown" + "## API key and client\n", + "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." + ] }, { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = None\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create a project\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Creates an empty project\nproject = client.create_project(\n name=\"my-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Creates an empty project\n", + "project = client.create_project(\n", + " name=\"my-test-project\",\n", + " description=\"a description\",\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create a dataset with data rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "dataset = client.create_dataset(name=\"project-demo-dataset\")\nglobal_keys = []\nuploads = []\n# Generate data rows\nfor i in range(1, 9):\n gb_key = \"TEST-ID-%id\" % uuid.uuid1()\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n gb_key,\n })\n global_keys.append(gb_key)\n\ntask = dataset.create_data_rows(uploads)\ntask.wait_till_done()\nprint(\"ERRORS: \", task.errors)\nprint(\"RESULT URL: \", task.result_url)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "dataset = client.create_dataset(name=\"project-demo-dataset\")\n", + "global_keys = []\n", + "uploads = []\n", + "# Generate data rows\n", + "for i in range(1, 9):\n", + " gb_key = \"TEST-ID-%id\" % uuid.uuid1()\n", + " uploads.append({\n", + " \"row_data\":\n", + " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\":\n", + " gb_key,\n", + " })\n", + " global_keys.append(gb_key)\n", + "\n", + "task = dataset.create_data_rows(uploads)\n", + "task.wait_till_done()\n", + "print(\"ERRORS: \", task.errors)\n", + "print(\"RESULT URL: \", task.result_url)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Add data rows to a project \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"project-demo\", # each batch in a project must have a unique name\n global_keys=\n global_keys, # paginated collection of data row objects, list of data row ids or global keys\n priority=1, # priority between 1(highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"project-demo\", # each batch in a project must have a unique name\n", + " global_keys=\n", + " global_keys, # paginated collection of data row objects, list of data row ids or global keys\n", + " priority=1, # priority between 1(highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create tags and assign them to a project\n", "In this section, we are creating a tag in the ontology and associating it with a project. Then we are listing the tags attached to a project.\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Create a tag" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Get the organization\norganization = client.get_organization()\n\ntag = organization.create_resource_tag({\n \"text\": \"new-tag-name\",\n \"color\": \"4ed2f9\"\n})", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get the organization\n", + "organization = client.get_organization()\n", + "\n", + "tag = organization.create_resource_tag({\n", + " \"text\": \"new-tag-name\",\n", + " \"color\": \"4ed2f9\"\n", + "})" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Assign the tag to a project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "tags = project.update_project_resource_tags([tag.uid])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "tags = project.update_project_resource_tags([tag.uid])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Get project tags" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "tags = project.get_resource_tags()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "tags = project.get_resource_tags()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Attach ontology and label data rows\n", "\n", "In this section, we are creating an ontology to attach to a project and creating labels to import as ground truths. We need this setup to demonstrate other methods later in the demo. For more information, please reference our [Ontology](https://docs.labelbox.com/reference/ontology) and [Import Image Annotation](https://docs.labelbox.com/reference/import-image-annotations) development guides." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Create your ontology" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create normalized json with a radio classification\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n ])\n# Creating an ontology\nontology = client.create_ontology(\"test-ontology\", ontology_builder.asdict())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null - }, - { + "source": [ + "# Create normalized json with a radio classification\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " ])\n", + "# Creating an ontology\n", + "ontology = client.create_ontology(\"test-ontology\", ontology_builder.asdict())" + ] + }, + { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Attach ontology to project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Create labels and upload them to project as ground truths" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create labels\nlabels = []\nfor global_key in global_keys:\n labels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n # Create radio classification annotation for labels\n lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n )\n ],\n ))\n\n# Upload labels for the data rows in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create labels\n", + "labels = []\n", + "for global_key in global_keys:\n", + " labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " # Create radio classification annotation for labels\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + " )\n", + " ],\n", + " ))\n", + "\n", + "# Upload labels for the data rows in project\n", + "upload_job = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"label_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job.wait_until_done()\n", + "\n", + "print(f\"Errors: {upload_job.errors}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Move data rows in project to different task queues" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Get list of task queues for project\ntask_queues = project.task_queues()\n\nfor task_queue in task_queues:\n print(task_queue)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get list of task queues for project\n", + "task_queues = project.task_queues()\n", + "\n", + "for task_queue in task_queues:\n", + " print(task_queue)" + ] }, { - "metadata": {}, - "source": "project.move_data_rows_to_task_queue(\n data_row_ids=lb.GlobalKeys(global_keys), # Provide a list of global keys\n task_queue_id=task_queues[2].\n uid, # Passing None moves data rows to \"Done\" task queue\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.move_data_rows_to_task_queue(\n", + " data_row_ids=lb.GlobalKeys(global_keys), # Provide a list of global keys\n", + " task_queue_id=task_queues[2].\n", + " uid, # Passing None moves data rows to \"Done\" task queue\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Fetch project configuration" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Note the project is not fully setup many of the fields will be empty.\nprint(\"Project is not setup yet:\", project.setup_complete is None)\nprint(\"Project name:\", project.name)\nprint(\"Project description:\", project.description)\nprint(\"Media Type:\", project.media_type)\nbatches = [b for b in project.batches()]\nprint(\"Project Batches\", batches)\nprint(\"Ontology:\", project.ontology())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Note the project is not fully setup many of the fields will be empty.\n", + "print(\"Project is not setup yet:\", project.setup_complete is None)\n", + "print(\"Project name:\", project.name)\n", + "print(\"Project description:\", project.description)\n", + "print(\"Media Type:\", project.media_type)\n", + "batches = [b for b in project.batches()]\n", + "print(\"Project Batches\", batches)\n", + "print(\"Ontology:\", project.ontology())" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Return number of labeled data rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "print(\"Number of labels:\", project.get_label_count())", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "print(\"Number of labels:\", project.get_label_count())" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get project overview" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Returns only the number of data rows and issues\noverview = project.get_overview()\n\n# Returns the number of data rows, issues and the details of the in_review queue\ndetailed_overview = project.get_overview(details=True)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Returns only the number of data rows and issues\n", + "overview = project.get_overview()\n", + "\n", + "# Returns the number of data rows, issues and the details of the in_review queue\n", + "detailed_overview = project.get_overview(details=True)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Duplicate a project\n", "Please see the section [Duplicate a project](https://docs.labelbox.com/docs/create-a-project#duplicate-a-project) to have the scope of the method." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "destination_project = project.clone()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "destination_project = project.clone()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Copy labels and data rows from one project to a different project\n", "In the below steps we will be copying data rows with their corresponding labels from one project to a different project with a similar ontology. First, we must set up a new project with a ontology that matches the tooling of our source project ontology." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create an empty destination project\ndestination_project = client.create_project(\n name=\"destination-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)\n\n# Create ontology and attach to destination project\ndestination_ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"destination_radio_question\",\n options=[\n lb.Option(value=\"destination_first_radio_answer\"),\n lb.Option(value=\"destination_second_radio_answer\"),\n ],\n ),\n ])\n\ndestination_ontology = client.create_ontology(\"dest-test-ontology\",\n ontology_builder.asdict())\n\ndestination_project.setup_editor(destination_ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create an empty destination project\n", + "destination_project = client.create_project(\n", + " name=\"destination-test-project\",\n", + " description=\"a description\",\n", + " media_type=lb.MediaType.Image,\n", + ")\n", + "\n", + "# Create ontology and attach to destination project\n", + "destination_ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"destination_radio_question\",\n", + " options=[\n", + " lb.Option(value=\"destination_first_radio_answer\"),\n", + " lb.Option(value=\"destination_second_radio_answer\"),\n", + " ],\n", + " ),\n", + " ])\n", + "\n", + "destination_ontology = client.create_ontology(\"dest-test-ontology\",\n", + " ontology_builder.asdict())\n", + "\n", + "destination_project.setup_editor(destination_ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Copy data rows and labels\n", @@ -335,36 +499,100 @@ " * ConflictResolutionStrategy.OverrideWithAnnotations\n", "* `param batch_priority`\n", " - The priority of the batch." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Get ontology dictionary to obtain featureSchemaIds\nsource_ontology_normalized = ontology.normalized\ndestination_ontology_normalized = destination_ontology.normalized\n\nANNOTATION_ONTOLOGY_MAPPING = {\n source_ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0]\n [\"featureSchemaId\"], # Classification featureSchemaID\n source_ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0][\"options\"][0]\n [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n source_ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0][\"options\"][1]\n [\"featureSchemaId\"],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Get ontology dictionary to obtain featureSchemaIds\n", + "source_ontology_normalized = ontology.normalized\n", + "destination_ontology_normalized = destination_ontology.normalized\n", + "\n", + "ANNOTATION_ONTOLOGY_MAPPING = {\n", + " source_ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n", + " destination_ontology_normalized[\"classifications\"][0]\n", + " [\"featureSchemaId\"], # Classification featureSchemaID\n", + " source_ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n", + " destination_ontology_normalized[\"classifications\"][0][\"options\"][0]\n", + " [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n", + " source_ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n", + " destination_ontology_normalized[\"classifications\"][0][\"options\"][1]\n", + " [\"featureSchemaId\"],\n", + "}" + ] }, { - "metadata": {}, - "source": "send_to_annotate_params = {\n \"source_project_id\":\n project.uid,\n \"annotations_ontology_mapping\":\n ANNOTATION_ONTOLOGY_MAPPING,\n \"exclude_data_rows_in_project\":\n False,\n \"override_existing_annotations_rule\":\n ConflictResolutionStrategy.OverrideWithPredictions,\n \"batch_priority\":\n 5,\n}\n\n# Get task id to workflow you want to send data rows. If sent to initial labeling queue, labels will be pre-labels.\nqueue_id = [\n queue.uid\n for queue in destination_project.task_queues()\n if queue.queue_type == \"MANUAL_REVIEW_QUEUE\"\n][0]\n\ntask = client.send_to_annotate_from_catalog(\n destination_project_id=destination_project.uid,\n task_queue_id=\n queue_id, # ID of workflow task, set ID to None if you want to send data rows with labels to the Done queue.\n batch_name=\"Prediction Import Demo Batch\",\n data_rows=lb.GlobalKeys(\n global_keys # Provide a list of global keys from source project\n ),\n params=send_to_annotate_params,\n)\n\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "send_to_annotate_params = {\n", + " \"source_project_id\":\n", + " project.uid,\n", + " \"annotations_ontology_mapping\":\n", + " ANNOTATION_ONTOLOGY_MAPPING,\n", + " \"exclude_data_rows_in_project\":\n", + " False,\n", + " \"override_existing_annotations_rule\":\n", + " ConflictResolutionStrategy.OverrideWithPredictions,\n", + " \"batch_priority\":\n", + " 5,\n", + "}\n", + "\n", + "# Get task id to workflow you want to send data rows. If sent to initial labeling queue, labels will be pre-labels.\n", + "queue_id = [\n", + " queue.uid\n", + " for queue in destination_project.task_queues()\n", + " if queue.queue_type == \"MANUAL_REVIEW_QUEUE\"\n", + "][0]\n", + "\n", + "task = client.send_to_annotate_from_catalog(\n", + " destination_project_id=destination_project.uid,\n", + " task_queue_id=\n", + " queue_id, # ID of workflow task, set ID to None if you want to send data rows with labels to the Done queue.\n", + " batch_name=\"Prediction Import Demo Batch\",\n", + " data_rows=lb.GlobalKeys(\n", + " global_keys # Provide a list of global keys from source project\n", + " ),\n", + " params=send_to_annotate_params,\n", + ")\n", + "\n", + "task.wait_till_done()\n", + "\n", + "print(f\"Errors: {task.errors}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "### Clean Up" - ], - "cell_type": "markdown" + "## Clean up\n", + "Uncomment and run the cell below to optionally delete Labelbox objects created." + ] }, { - "metadata": {}, - "source": "# project.delete()\n# destination_project.delete()\n# dataset.delete()\n# client.delete_unused_ontology(destination_ontology.uid)\n# client.delete_unused_ontology(ontology.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# destination_project.delete()\n", + "# dataset.delete()\n", + "# client.delete_unused_ontology(destination_ontology.uid)\n", + "# client.delete_unused_ontology(ontology.uid)" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index 60e2cb0c7..679f82624 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Export composite masks \n", @@ -36,53 +34,66 @@ "Composite masks are a combination of mask instances grouped in a single mask URL. \n", "\n", "The purpose of this demo is to demonstrate how to transition from exporting single masks to exporting composite masks. " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport urllib.request\nfrom PIL import Image\nimport json", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import urllib.request\n", + "from PIL import Image\n", + "import json" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)\nclient.enable_experimental = (\n True ## This is required if using the export() streamable method\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)\n", + "client.enable_experimental = (\n", + " True ## This is required if using the export() streamable method\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Key differences between single mask instance and composite mask." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Composite masks\n", @@ -117,17 +128,31 @@ " }\n", "```\n", "3. A unique RGB color is assigned to each mask instance. The example below shows a composite mask of a label, and while it contains all mask instances, only the RGB color associated with this particular annotation will be filled in under the ```color_rgb``` field." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Example on how to fetch a composite mask\n# The mask here shows all the mask instances associated with a label\ntask_id = \"\"\ncomposite_mask_id = \"\"\n\nmask_url = f\"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\"\nreq = urllib.request.Request(mask_url, headers=client.headers)\nimage = Image.open(urllib.request.urlopen(req))\nw, h = image.size\nnew_w = w // 4\nnew_h = h // 4\n\nimage.resize((new_w, new_h), Image.BICUBIC)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Example on how to fetch a composite mask\n", + "# The mask here shows all the mask instances associated with a label\n", + "task_id = \"\"\n", + "composite_mask_id = \"\"\n", + "\n", + "mask_url = f\"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\"\n", + "req = urllib.request.Request(mask_url, headers=client.headers)\n", + "image = Image.open(urllib.request.urlopen(req))\n", + "w, h = image.size\n", + "new_w = w // 4\n", + "new_h = h // 4\n", + "\n", + "image.resize((new_w, new_h), Image.BICUBIC)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Here's an example of an entry featuring a composite mask (see image above) containing the mask instance's RGB color uniquely associated with the annotation.\n", @@ -151,17 +176,17 @@ " }\n", "```\n", "- rgb(123,103,152) = Purple\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "---" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Single mask instance:\n", @@ -177,79 +202,193 @@ "\n", "```\n", "3. RGB color is not present" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create an export from an Image project with mask annotations\n", "To better showcase composite masks, make sure you have different mask tools and mask annotations in your project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Insert the project ID of the project from which you wish to export data rows.\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Insert the project ID of the project from which you wish to export data rows.\n", + "PROJECT_ID = \"\"\n", + "project = client.get_project(PROJECT_ID)" + ] }, { - "metadata": {}, - "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n}\n\nfilters = {}\n\n# export() is the streamable option of exports V2, for more information please visit our documentation:\n# https://docs.labelbox.com/reference/label-export#export-v2-methods\n\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.has_result():\n print(export_task.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + "}\n", + "\n", + "filters = {}\n", + "\n", + "# export() is the streamable option of exports V2, for more information please visit our documentation:\n", + "# https://docs.labelbox.com/reference/label-export#export-v2-methods\n", + "\n", + "export_task = project.export(params=export_params, filters=filters)\n", + "export_task.wait_till_done()\n", + "\n", + "if export_task.has_result():\n", + " print(export_task.result)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Get all the ```color_rgb``` associated with annotations that are using a specific mask tool " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "stream = export_task.get_buffered_stream()\n\nmask_tool_rgb_mapping = {}\n\nfor output in stream:\n # Parse the JSON string from the output\n output_json = json.loads(output.json)\n\n # Get the labels for the specified project ID or an empty list if the project ID is not found\n project_labels = (output_json[\"projects\"].get(PROJECT_ID,\n {}).get(\"labels\", []))\n\n # Iterate through each label\n for label in project_labels:\n # Get the list of annotations (objects) for the label\n annotations = label[\"annotations\"].get(\"objects\", [])\n\n # Iterate through each annotation\n for annotation in annotations:\n # Check if the annotation is of type \"ImageSegmentationMask\"\n if annotation.get(\"annotation_kind\") == \"ImageSegmentationMask\":\n # Add the color RGB information to the mapping dictionary\n mask_tool_rgb_mapping.setdefault(annotation[\"name\"], []).append(\n annotation[\"composite_mask\"][\"color_rgb\"])\n\nprint(mask_tool_rgb_mapping)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "stream = export_task.get_buffered_stream()\n", + "\n", + "mask_tool_rgb_mapping = {}\n", + "\n", + "for output in stream:\n", + " # Parse the JSON string from the output\n", + " output_json = output.json\n", + "\n", + " # Get the labels for the specified project ID or an empty list if the project ID is not found\n", + " project_labels = (output_json[\"projects\"].get(PROJECT_ID,\n", + " {}).get(\"labels\", []))\n", + "\n", + " # Iterate through each label\n", + " for label in project_labels:\n", + " # Get the list of annotations (objects) for the label\n", + " annotations = label[\"annotations\"].get(\"objects\", [])\n", + "\n", + " # Iterate through each annotation\n", + " for annotation in annotations:\n", + " # Check if the annotation is of type \"ImageSegmentationMask\"\n", + " if annotation.get(\"annotation_kind\") == \"ImageSegmentationMask\":\n", + " # Add the color RGB information to the mapping dictionary\n", + " mask_tool_rgb_mapping.setdefault(annotation[\"name\"], []).append(\n", + " annotation[\"composite_mask\"][\"color_rgb\"])\n", + "\n", + "print(mask_tool_rgb_mapping)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create an export from a Video project with mask annotations " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "VIDEO_PROJECT_ID = \"\"\nproject_video = client.get_project(VIDEO_PROJECT_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "VIDEO_PROJECT_ID = \"\"\n", + "project_video = client.get_project(VIDEO_PROJECT_ID)" + ] }, { - "metadata": {}, - "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n}\n\nfilters = {}\n\n# export() is the streamable option of exports V2, for more information please visit our documentation:\n# https://docs.labelbox.com/reference/label-export#export-v2-methods\n\nexport_task_video = project_video.export(params=export_params, filters=filters)\nexport_task_video.wait_till_done()\n\nif export_task_video.has_result():\n print(export_task_video.result)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "export_params = {\n", + " \"attachments\": True,\n", + " \"metadata_fields\": True,\n", + " \"data_row_details\": True,\n", + " \"project_details\": True,\n", + " \"label_details\": True,\n", + " \"performance_details\": True,\n", + " \"interpolated_frames\": True,\n", + "}\n", + "\n", + "filters = {}\n", + "\n", + "# export() is the streamable option of exports V2, for more information please visit our documentation:\n", + "# https://docs.labelbox.com/reference/label-export#export-v2-methods\n", + "\n", + "export_task_video = project_video.export(params=export_params, filters=filters)\n", + "export_task_video.wait_till_done()\n", + "\n", + "if export_task_video.has_result():\n", + " print(export_task_video.result)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Get all the ```color_rgb``` associated with annotations that are using a specific mask tool from each frame" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "tools_frames_color = {}\nstream = export_task_video.get_buffered_stream()\n\n# Iterate over each output in the stream\nfor output in stream:\n output_json = json.loads(output.json)\n\n # Iterate over the labels in the specific project\n for dr in output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n frames_data = dr[\"annotations\"][\"frames\"]\n\n # Iterate over each frame in the frames data\n for frame_key, frame_value in frames_data.items():\n\n # Iterate over each annotation in the frame\n for annotation_key, annotation_value in frame_value.items():\n if \"objects\" in annotation_key and annotation_value.values():\n\n # Iterate over each object in the annotation\n for object_key, object_value in annotation_value.items():\n if (object_value[\"annotation_kind\"] ==\n \"VideoSegmentationMask\"):\n # Update tools_frames_color with object information\n tools_frames_color.setdefault(\n object_value[\"name\"], []).append({\n frame_key:\n object_value[\"composite_mask\"]\n [\"color_rgb\"]\n })\n\nprint(tools_frames_color)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "tools_frames_color = {}\n", + "stream = export_task_video.get_buffered_stream()\n", + "\n", + "# Iterate over each output in the stream\n", + "for output in stream:\n", + " output_json = output.json\n", + "\n", + " # Iterate over the labels in the specific project\n", + " for dr in output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n", + " frames_data = dr[\"annotations\"][\"frames\"]\n", + "\n", + " # Iterate over each frame in the frames data\n", + " for frame_key, frame_value in frames_data.items():\n", + "\n", + " # Iterate over each annotation in the frame\n", + " for annotation_key, annotation_value in frame_value.items():\n", + " if \"objects\" in annotation_key and annotation_value.values():\n", + "\n", + " # Iterate over each object in the annotation\n", + " for object_key, object_value in annotation_value.items():\n", + " if (object_value[\"annotation_kind\"] ==\n", + " \"VideoSegmentationMask\"):\n", + " # Update tools_frames_color with object information\n", + " tools_frames_color.setdefault(\n", + " object_value[\"name\"], []).append({\n", + " frame_key:\n", + " object_value[\"composite_mask\"]\n", + " [\"color_rgb\"]\n", + " })\n", + "\n", + "print(tools_frames_color)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/model_experiments/custom_metrics_demo.ipynb b/examples/model_experiments/custom_metrics_demo.ipynb index ed8516d2a..670073141 100644 --- a/examples/model_experiments/custom_metrics_demo.ipynb +++ b/examples/model_experiments/custom_metrics_demo.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,250 +22,1547 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "----\n", - "\n", "# Model Diagnostics - Custom Metrics Demo\n", "\n", "* Measuring model quality is critical to efficiently building models. It is important that the metrics used to measure model quality closely align with the business objectives for the model. Otherwise, slight changes in model quality, as they related to these core objectives, are lost to noise. Custom metrics enables users to measure model quality in terms of their exact business goals. By incorporating custom metrics into workflows, users can:\n", " * Iterate faster\n", " * Measure and report on model quality\n", " * Understand marginal value of additional labels and modeling efforts\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "## Environment setup" - ], - "cell_type": "markdown" + "## Set up" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q --upgrade \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "## Replace with your API Key\n", - "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ], - "cell_type": "markdown" + "## API key and client\n", + "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = None\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "### Classification: Radio (single-choice)" - ], - "cell_type": "markdown" + "## Classifications" + ] }, { + "cell_type": "markdown", "metadata": {}, - "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n )),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n}", + "source": [ + "### Radio (single-choice)" + ] + }, + { "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "### Classification: checklist (multi-choice)" - ], - "cell_type": "markdown" + "#### Checklist (multi-choice)" + ] }, { - "metadata": {}, - "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n ]),\n)\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n {\n \"name\":\n \"second_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + " lb_types.ClassificationAnswer(\n", + " name=\"second_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + " ]),\n", + ")\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " {\n", + " \"name\":\n", + " \"second_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"confidence\":\n 0.5,\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"confidence\":\n 0.5,\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\":\n \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n },\n }],\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_radio_question\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + "}\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "bbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"bbox\": {\n", + " \"top\": 977,\n", + " \"left\": 1690,\n", + " \"height\": 330,\n", + " \"width\": 225\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n)\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332,\n", + " },\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "## NDJSON\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.2\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.1\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.3\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 23\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " },\n", + " }],\n", + " \"bbox\": {\n", + " \"top\": 933,\n", + " \"left\": 541,\n", + " \"height\": 191,\n", + " \"width\": 330\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Anotation\n", + "polygon_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "polygon_prediction_ndjson = {\n", + " \"name\":\n", + " \"polygon\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"polygon\": [\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " {\n", + " \"x\": 2278.306,\n", + " \"y\": 256.885\n", + " },\n", + " {\n", + " \"x\": 2428.197,\n", + " \"y\": 200.437\n", + " },\n", + " {\n", + " \"x\": 2560.0,\n", + " \"y\": 335.419\n", + " },\n", + " {\n", + " \"x\": 2557.386,\n", + " \"y\": 503.165\n", + " },\n", + " {\n", + " \"x\": 2320.596,\n", + " \"y\": 503.103\n", + " },\n", + " {\n", + " \"x\": 2156.083,\n", + " \"y\": 628.943\n", + " },\n", + " {\n", + " \"x\": 2161.111,\n", + " \"y\": 785.519\n", + " },\n", + " {\n", + " \"x\": 2002.115,\n", + " \"y\": 894.647\n", + " },\n", + " {\n", + " \"x\": 1838.456,\n", + " \"y\": 877.874\n", + " },\n", + " {\n", + " \"x\": 1436.53,\n", + " \"y\": 874.636\n", + " },\n", + " {\n", + " \"x\": 1411.403,\n", + " \"y\": 758.579\n", + " },\n", + " {\n", + " \"x\": 1353.853,\n", + " \"y\": 751.74\n", + " },\n", + " {\n", + " \"x\": 1345.264,\n", + " \"y\": 453.461\n", + " },\n", + " {\n", + " \"x\": 1426.011,\n", + " \"y\": 421.129\n", + " },\n", + " {\n", + " \"x\": 1489.581,\n", + " \"y\": 183.934\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python annotation\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\",\n", + " value=lb_types.Text(\n", + " answer=\"sample text\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "text_annotation_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Python Annotation\n", + "point_prediction = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "point_prediction_ndjson = {\n", + " \"name\": \"point\",\n", + " \"confidence\": 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [],\n", + " \"point\": {\n", + " \"x\": 1166.606,\n", + " \"y\": 1441.768\n", + " },\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "polyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "polyline_prediction = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " confidence=0.5,\n", + " custom_metrics=[\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")\n", + "\n", + "polyline_prediction_ndjson = {\n", + " \"name\":\n", + " \"polyline\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"customMetrics\": [\n", + " {\n", + " \"name\": \"iou\",\n", + " \"value\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"f1\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"precision\",\n", + " \"value\": 0.55\n", + " },\n", + " {\n", + " \"name\": \"recall\",\n", + " \"value\": 0.33\n", + " },\n", + " {\n", + " \"name\": \"tagsCount\",\n", + " \"value\": 43\n", + " },\n", + " {\n", + " \"name\": \"metric_with_a_very_long_name\",\n", + " \"value\": 0.334332\n", + " },\n", + " ],\n", + " \"classifications\": [],\n", + " \"line\": [\n", + " {\n", + " \"x\": 2534.353,\n", + " \"y\": 249.471\n", + " },\n", + " {\n", + " \"x\": 2429.492,\n", + " \"y\": 182.092\n", + " },\n", + " {\n", + " \"x\": 2294.322,\n", + " \"y\": 221.962\n", + " },\n", + " {\n", + " \"x\": 2224.491,\n", + " \"y\": 180.463\n", + " },\n", + " {\n", + " \"x\": 2136.123,\n", + " \"y\": 204.716\n", + " },\n", + " {\n", + " \"x\": 1712.247,\n", + " \"y\": 173.949\n", + " },\n", + " {\n", + " \"x\": 1703.838,\n", + " \"y\": 84.438\n", + " },\n", + " {\n", + " \"x\": 1579.772,\n", + " \"y\": 82.61\n", + " },\n", + " {\n", + " \"x\": 1583.442,\n", + " \"y\": 167.552\n", + " },\n", + " {\n", + " \"x\": 1478.869,\n", + " \"y\": 164.903\n", + " },\n", + " {\n", + " \"x\": 1418.941,\n", + " \"y\": 318.149\n", + " },\n", + " {\n", + " \"x\": 1243.128,\n", + " \"y\": 400.815\n", + " },\n", + " {\n", + " \"x\": 1022.067,\n", + " \"y\": 319.007\n", + " },\n", + " {\n", + " \"x\": 892.367,\n", + " \"y\": 379.216\n", + " },\n", + " {\n", + " \"x\": 670.273,\n", + " \"y\": 364.408\n", + " },\n", + " {\n", + " \"x\": 613.114,\n", + " \"y\": 288.16\n", + " },\n", + " {\n", + " \"x\": 377.559,\n", + " \"y\": 238.251\n", + " },\n", + " {\n", + " \"x\": 368.087,\n", + " \"y\": 185.064\n", + " },\n", + " {\n", + " \"x\": 246.557,\n", + " \"y\": 167.286\n", + " },\n", + " {\n", + " \"x\": 236.648,\n", + " \"y\": 285.61\n", + " },\n", + " {\n", + " \"x\": 90.929,\n", + " \"y\": 326.412\n", + " },\n", + " ],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\ntest_img_urls = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"Custom metrics demo\",\n iam_integration=None)\ntask = dataset.create_data_rows([test_img_urls])\n\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as batch to the project\n", + "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", + "test_img_urls = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"Custom metrics demo\",\n", + " iam_integration=None)\n", + "task = dataset.create_data_rows([test_img_urls])\n", + "\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\"),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of tools\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", + " ),\n", + " ],\n", + " ),\n", + " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", + " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", + " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Image Prediction Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(\n name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(\n", + " name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid,\n", + ")\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -275,150 +1570,376 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section.\n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both (with exception of the uuid strings that are generated)" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label for predictions\n", + "label_prediction = []\n", + "label_prediction.append(\n", + " lb_types.Label(\n", + " data=lb_types.ImageData(global_key=global_key),\n", + " annotations=[\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " polyline_prediction,\n", + " polygon_prediction,\n", + " point_prediction,\n", + " text_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_prediction_ndjson = []\n", + "\n", + "for annot in [\n", + " radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " polygon_prediction_ndjson,\n", + " point_prediction_ndjson,\n", + " polyline_prediction_ndjson,\n", + " text_annotation_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + "]:\n", + " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", + " label_prediction_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_prediction,\n", + ")\n", + "\n", + "# Errors will appear for prediction uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations.\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"image_prediction_many_kinds\",\n", + " media_type=lb.MediaType.Image)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Annotations ###########\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"second_radio_answer\")),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", + " end=lb_types.Point(x=1915,\n", + " y=1307), # x= left + width , y = top + height\n", + " ),\n", + ")\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.Rectangle(\n", + " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", + " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\", confidence=0.5)),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "polygon_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polygon\",\n", + " value=lb_types.Polygon(points=[\n", + " lb_types.Point(x=1489.581, y=183.934),\n", + " lb_types.Point(x=2278.306, y=256.885),\n", + " lb_types.Point(x=2428.197, y=200.437),\n", + " lb_types.Point(x=2560.0, y=335.419),\n", + " lb_types.Point(x=2557.386, y=503.165),\n", + " lb_types.Point(x=2320.596, y=503.103),\n", + " lb_types.Point(x=2156.083, y=628.943),\n", + " lb_types.Point(x=2161.111, y=785.519),\n", + " lb_types.Point(x=2002.115, y=894.647),\n", + " lb_types.Point(x=1838.456, y=877.874),\n", + " lb_types.Point(x=1436.53, y=874.636),\n", + " lb_types.Point(x=1411.403, y=758.579),\n", + " lb_types.Point(x=1353.853, y=751.74),\n", + " lb_types.Point(x=1345.264, y=453.461),\n", + " lb_types.Point(x=1426.011, y=421.129),\n", + " ]),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "point_annotation = lb_types.ObjectAnnotation(\n", + " name=\"point\",\n", + " value=lb_types.Point(x=1166.606, y=1441.768),\n", + ")\n", + "\n", + "polyline_annotation = lb_types.ObjectAnnotation(\n", + " name=\"polyline\",\n", + " value=lb_types.Line(points=[\n", + " lb_types.Point(x=2534.353, y=249.471),\n", + " lb_types.Point(x=2429.492, y=182.092),\n", + " lb_types.Point(x=2294.322, y=221.962),\n", + " lb_types.Point(x=2224.491, y=180.463),\n", + " lb_types.Point(x=2136.123, y=204.716),\n", + " lb_types.Point(x=1712.247, y=173.949),\n", + " lb_types.Point(x=1703.838, y=84.438),\n", + " lb_types.Point(x=1579.772, y=82.61),\n", + " lb_types.Point(x=1583.442, y=167.552),\n", + " lb_types.Point(x=1478.869, y=164.903),\n", + " lb_types.Point(x=1418.941, y=318.149),\n", + " lb_types.Point(x=1243.128, y=400.815),\n", + " lb_types.Point(x=1022.067, y=319.007),\n", + " lb_types.Point(x=892.367, y=379.216),\n", + " lb_types.Point(x=670.273, y=364.408),\n", + " lb_types.Point(x=613.114, y=288.16),\n", + " lb_types.Point(x=377.559, y=238.251),\n", + " lb_types.Point(x=368.087, y=185.064),\n", + " lb_types.Point(x=246.557, y=167.286),\n", + " lb_types.Point(x=236.648, y=285.61),\n", + " lb_types.Point(x=90.929, y=326.412),\n", + " ]),\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n annotations=annotations))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", + "label = []\n", + "annotations = [\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " polygon_annotation,\n", + " point_annotation,\n", + " polyline_annotation,\n", + "]\n", + "label.append(\n", + " lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n", + " annotations=annotations))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"annotation_import_\" + str(uuid.uuid4()),\n", + " labels=label,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/model_experiments/model_slices.ipynb b/examples/model_experiments/model_slices.ipynb index a91709fa0..546f0aedf 100644 --- a/examples/model_experiments/model_slices.ipynb +++ b/examples/model_experiments/model_slices.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,10 +22,10 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# Model Slices\n", @@ -37,129 +35,192 @@ "This notebook is used to go over some common Labelbox SDK methods to interact with Model Slices created through the Labelbox platform.\n", "\n", "See [Slices](https://docs.labelbox.com/docs/slices-1) for more information on modifying Model Slices." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, - "source": "%pip install labelbox", + "source": [ + "## Set up" + ] + }, + { "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q --upgrade \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\nimport uuid", "cell_type": "code", + "execution_count": 1, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "import uuid" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "## API Key and Client\n", - "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ], - "cell_type": "markdown" + "## API key and client\n", + "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." + ] }, { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = \"\"\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = None\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Create Model Slice\n", "\n", "In order to interact with model slices, you must create a Model Experiment with a Model Run and then create a Model Slice through the platform. The steps below go over this process. See [Model](https://docs.labelbox.com/reference/model) from our developer guides for more information." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Creating Model Experiment\n", "\n", "To create a Model Experiment you will need to create an ontology. See [Ontology](https://docs.labelbox.com/reference/ontology) for more information" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "classification_features = [\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"Quality Issues\",\n", + " options=[\n", + " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", + " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", + " ],\n", + " )\n", + "]\n", + "\n", + "ontology_builder = lb.OntologyBuilder(tools=[],\n", + " classifications=classification_features)\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology from new features\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Experiment" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model = client.create_model(name=\"Model Slice Demo\", ontology_id=ontology.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model = client.create_model(name=f\"Model Slice Demo {str(uuid.uuid4())}\", ontology_id=ontology.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Creating a Model Run from Model Experiment\n", "\n", "On this step we will need to create a dataset to attach data rows to our model run. See [Dataset](https://docs.labelbox.com/reference/dataset) for more information." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Dataset and Data Rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# send a sample image as data row for a dataset\nglobal_key = str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"foundry-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")\nprint(f\"Failed data rows: {task.failed_data_rows}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# send a sample image as data row for a dataset\n", + "global_key = str(uuid.uuid4())\n", + "\n", + "test_img_url = {\n", + " \"row_data\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", + " \"global_key\":\n", + " global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"foundry-demo-dataset\")\n", + "task = dataset.create_data_rows([test_img_url])\n", + "task.wait_till_done()\n", + "\n", + "print(f\"Errors: {task.errors}\")\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Run and Attach Data Rows" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run_name = \"Model Slice Demo\"\nexample_config = {\n \"learning_rate\": 0.001,\n \"batch_size\": 32,\n}\nmodel_run = model.create_model_run(name=model_run_name, config=example_config)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run_name = \"Model Slice Demo\"\n", + "example_config = {\n", + " \"learning_rate\": 0.001,\n", + " \"batch_size\": 32,\n", + "}\n", + "model_run = model.create_model_run(name=model_run_name, config=example_config)" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Make Model Slice Inside UI\n", @@ -175,71 +236,139 @@ "5. Give the slice a name and select ***Save***.\n", "6. Above the ***Search your data*** dropdown you will see your slice's name. Select that dropdown and click ***Copy slice ID***.\n", "7. Paste the ***Slice ID*** below." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "SLICE_ID = \"\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "SLICE_ID = \"\"" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Get Model Slice" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_slice = client.get_model_slice(SLICE_ID)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_slice = client.get_model_slice(SLICE_ID)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Data Row IDs from Model Slice" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "data_row_ids = model_slice.get_data_row_ids(model_run.uid)\n\nfor data_row_id in data_row_ids:\n print(data_row_id)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_row_ids = model_slice.get_data_row_ids(model_run.uid)\n", + "\n", + "for data_row_id in data_row_ids:\n", + " print(data_row_id)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Data Row Identifiers Objects" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "data_rows = model_slice.get_data_row_identifiers(model_run.uid)\n\nfor data_row in data_rows:\n print(data_row)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "data_rows = model_slice.get_data_row_identifiers(model_run.uid)\n", + "\n", + "for data_row in data_rows:\n", + " print(data_row)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Model Slice Attributes" - ], - "cell_type": "markdown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# name (str)\n", + "model_slice.name\n", + "\n", + "# description (str)\n", + "model_slice.description\n", + "\n", + "# updated at (datetime)\n", + "model_slice.updated_at\n", + "\n", + "# created at (datetime)\n", + "model_slice.created_at\n", + "\n", + "# filter (list[dict])\n", + "model_slice.filter" + ] }, { + "cell_type": "markdown", "metadata": {}, - "source": "# name (str)\nmodel_slice.name\n\n# description (str)\nmodel_slice.description\n\n# updated at (datetime)\nmodel_slice.updated_at\n\n# created at (datetime)\nmodel_slice.created_at\n\n# filter (list[dict])\nmodel_slice.filter", + "source": [ + "## Clean up\n", + "Uncomment and run the cell below to optionally delete Labelbox objects created." + ] + }, + { "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# model_run.delete()\n", + "# model.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 942d40e9e..962e35704 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,17 +22,17 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -53,115 +51,419 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"" + ] }, { - "metadata": {}, - "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import uuid\n", + "import json\n", + "import requests\n", + "import labelbox as lb\n", + "import labelbox.types as lb_types" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "API_KEY = \"\"\n", + "client = lb.Client(API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########## Entity ##########\n", + "\n", + "# Annotation Types\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"named_entity\",\n", + " textSelections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + ")\n", + "\n", + "# NDJSON\n", + "entities_prediction_ndjson = {\n", + " \"name\":\n", + " \"named_entity\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\",],\n", + " \"groupId\": \"\",\n", + " \"page\": 1,\n", + " }],\n", + "}" + ] }, { - "metadata": {}, - "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "########### Radio Classification #########\n", + "\n", + "# Annotation types\n", + "radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\", confidence=0.5)),\n", + ")\n", + "# NDJSON\n", + "radio_prediction_ndjson = {\n", + " \"name\": \"radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Checklist Classification ###########\n", + "\n", + "# Annotation types\n", + "checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", + " confidence=0.5),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", + " confidence=0.5),\n", + " ]),\n", + ")\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"checklist_question\",\n", + " \"answer\": [\n", + " {\n", + " \"name\": \"first_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " {\n", + " \"name\": \"second_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " ],\n", + "}" + ] }, { - "metadata": {}, - "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ Bounding Box ###########\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "bbox_prediction_ndjson = {\n", + " \"name\": \"bounding_box\",\n", + " \"bbox\": bbox_dim_1,\n", + " \"page\": 0,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# ############ global nested classifications ###########\n", + "\n", + "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\":\n", + " \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\":\n", + " \"first_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\":\n", + " 0.5, # Confidence scores should be added to the answer\n", + " },\n", + " }],\n", + " }],\n", + "}\n", + "\n", + "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " confidence=0.5, # Confidence scores should be added to the answer\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=\n", + " 0.5, # Confidence scores should be added to the answer\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "nested_radio_prediction_ndjson = {\n", + " \"name\": \"nested_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"first_sub_radio_answer\",\n", + " \"confidence\": 0.5\n", + " },\n", + " }],\n", + " },\n", + "}" + ] }, { - "metadata": {}, - "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############## Classification Free-form text ##############\n", + "\n", + "text_prediction = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", # must match your ontology feature\"s name\n", + " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", + ")\n", + "\n", + "text_prediction_ndjson = {\n", + " \"name\": \"free_text\",\n", + " \"answer\": \"sample text\",\n", + " \"confidence\": 0.5,\n", + "}" + ] }, { - "metadata": {}, - "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "######### BBOX with nested classifications #########\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " confidence=0.5,\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\",\n", + " confidence=0.5,\n", + " )),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "bbox_with_radio_subclass_prediction_ndjson = {\n", + " \"name\": \"bbox_with_radio_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\":\n", + " \"first_sub_radio_answer\",\n", + " \"confidence\":\n", + " 0.5,\n", + " \"classifications\": [{\n", + " \"name\": \"second_sub_radio_question\",\n", + " \"answer\": {\n", + " \"name\": \"second_sub_radio_answer\",\n", + " \"confidence\": 0.5,\n", + " },\n", + " }],\n", + " },\n", + " }],\n", + " \"bbox\": bbox_dim,\n", + " \"page\": 1,\n", + " \"unit\": \"POINTS\",\n", + "}" + ] }, { - "metadata": {}, - "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "############ NER with nested classifications ########\n", + "\n", + "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " text_selections=[\n", + " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", + " ],\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_prediction_ndjson = {\n", + " \"name\":\n", + " \"ner_with_checklist_subclass\",\n", + " \"classifications\": [{\n", + " \"name\": \"sub_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_sub_checklist_answer\",\n", + " \"confidence\": 0.5\n", + " }],\n", + " }],\n", + " \"textSelections\": [{\n", + " \"tokenIds\": [\"\"],\n", + " \"groupId\": \"\",\n", + " \"page\": 1\n", + " }],\n", + "}" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -175,60 +477,200 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", + "img_url = {\n", + " \"row_data\": {\n", + " \"pdf_url\":\n", + " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", + " },\n", + " \"global_key\": global_key,\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", + "task = dataset.create_data_rows([img_url])\n", + "task.wait_till_done()\n", + "print(f\"Failed data rows: {task.failed_data_rows}\")\n", + "print(f\"Errors: {task.errors}\")\n", + "\n", + "if task.errors:\n", + " for error in task.errors:\n", + " if (\"Duplicate global key\" in error[\"message\"] and\n", + " dataset.row_count == 0):\n", + " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", + " print(f\"Deleting empty dataset: {dataset}\")\n", + " dataset.delete()" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_radio_answer\"),\n", + " lb.Option(value=\"second_radio_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(value=\"first_checklist_answer\"),\n", + " lb.Option(value=\"second_checklist_answer\"),\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.TEXT,\n", + " name=\"free_text\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"nested_radio_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[lb.Option(\"first_sub_radio_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"nested_checklist_question\",\n", + " scope=lb.Classification.Scope.GLOBAL,\n", + " options=[\n", + " lb.Option(\n", + " \"first_checklist_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + " tools=[ # List of Tool objects\n", + " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", + " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.NER,\n", + " name=\"ner_with_checklist_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"sub_checklist_question\",\n", + " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", + " )\n", + " ],\n", + " ),\n", + " lb.Tool(\n", + " tool=lb.Tool.Type.BBOX,\n", + " name=\"bbox_with_radio_subclass\",\n", + " classifications=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"sub_radio_question\",\n", + " options=[\n", + " lb.Option(\n", + " value=\"first_sub_radio_answer\",\n", + " options=[\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.RADIO,\n", + " name=\"second_sub_radio_question\",\n", + " options=[\n", + " lb.Option(\"second_sub_radio_answer\")\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " )\n", + " ],\n", + " ),\n", + " ],\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Document Annotation Import Demo\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Document,\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# create Model\n", + "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "model_run.upsert_data_rows(global_keys=[global_key])", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "model_run.upsert_data_rows(global_keys=[global_key])" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -237,184 +679,508 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = json.loads(output.json)\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "client.enable_experimental = True\n", + "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", + "task.wait_till_done()\n", + "stream = task.get_buffered_stream()\n", + "\n", + "text_layer = \"\"\n", + "for output in stream:\n", + " output_json = output.json\n", + " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", + "print(text_layer)" + ] }, { - "metadata": {}, - "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Helper method\n", + "def update_text_selections(annotation, group_id, list_tokens, page):\n", + " return annotation.update({\n", + " \"textSelections\": [{\n", + " \"groupId\": group_id,\n", + " \"tokenIds\": list_tokens,\n", + " \"page\": page\n", + " }]\n", + " })\n", + "\n", + "\n", + "# Fetch the content of the text layer\n", + "res = requests.get(text_layer)\n", + "\n", + "# Phrases that we want to annotation obtained from the text layer url\n", + "content_phrases = [\n", + " \"Metal-insulator (MI) transitions have been one of the\",\n", + " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", + "]\n", + "\n", + "# Parse the text layer\n", + "text_selections = []\n", + "text_selections_ner = []\n", + "\n", + "for obj in json.loads(res.text):\n", + " for group in obj[\"groups\"]:\n", + " if group[\"content\"] == content_phrases[0]:\n", + " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " document_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", + " text_selections.append(document_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=entities_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + " if group[\"content\"] == content_phrases[1]:\n", + " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", + " # build text selections for Python Annotation Types\n", + " ner_text_selection = lb_types.DocumentTextSelection(\n", + " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", + " text_selections_ner.append(ner_text_selection)\n", + " # build text selection for the NDJson annotations\n", + " update_text_selections(\n", + " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", + " group_id=group[\"id\"], # id representing group of words\n", + " list_tokens=\n", + " list_tokens_2, # ids representing individual words from the group\n", + " page=1,\n", + " )\n", + "\n", + "# re-write the entity annotation with text selections\n", + "entities_prediction_document_entity = lb_types.DocumentEntity(\n", + " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", + "entities_prediction = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\", value=entities_prediction_document_entity)\n", + "\n", + "# re-write the entity annotation + subclassification with text selections\n", + "classifications = [\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", + " confidence=0.5)\n", + " ]),\n", + " )\n", + "]\n", + "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " textSelections=text_selections_ner,\n", + ")\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " confidence=0.5,\n", + " value=ner_annotation_with_subclass,\n", + " classifications=classifications,\n", + ")\n", + "\n", + "# Final NDJSON and python annotations\n", + "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", + "print(f\"entities_annotation={entities_prediction}\")\n", + "print(\n", + " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", + ")\n", + "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions = []\n", + "\n", + "label_predictions.append(\n", + " lb_types.Label(\n", + " data=lb_types.DocumentData(global_key=global_key),\n", + " annotations=[\n", + " entities_prediction,\n", + " checklist_prediction,\n", + " nested_checklist_prediction,\n", + " text_prediction,\n", + " radio_prediction,\n", + " nested_radio_prediction,\n", + " bbox_prediction,\n", + " bbox_with_radio_subclass_prediction,\n", + " ner_with_checklist_subclass_prediction,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "label_predictions_ndjson = []\n", + "for annot in [\n", + " entities_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " radio_prediction_ndjson,\n", + " nested_radio_prediction_ndjson,\n", + " bbox_prediction_ndjson,\n", + " bbox_with_radio_subclass_prediction_ndjson,\n", + " ner_with_checklist_subclass_prediction_ndjson,\n", + "]:\n", + " annot.update({\n", + " \"dataRow\": {\n", + " \"globalKey\": global_key\n", + " },\n", + " })\n", + " label_predictions_ndjson.append(annot)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", + " predictions=label_predictions,\n", + ")\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)\n", + "print(\"Status of uploads: \", upload_job_prediction.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project = client.create_project(name=\"Document Prediction Import Demo\",\n", + " media_type=lb.MediaType.Document)\n", + "project.setup_editor(ontology)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "project.create_batch(\n", + " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", + " global_keys=[\n", + " global_key\n", + " ], # Paginated collection of data row objects, list of data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest)\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "entities_annotation = lb_types.ObjectAnnotation(\n", + " name=\"named_entity\",\n", + " value=lb_types.DocumentEntity(name=\"named_entity\",\n", + " textSelections=text_selections),\n", + ")\n", + "\n", + "radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\")),\n", + ")\n", + "\n", + "checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", + " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", + " ]),\n", + ")\n", + "\n", + "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", + "bbox_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bounding_box\", # must match your ontology feature\"s name\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", + " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", + " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " page=0,\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " ),\n", + ")\n", + "\n", + "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_checklist_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(\n", + " name=\"first_sub_checklist_answer\",)\n", + " ]),\n", + " )\n", + " ],\n", + " )\n", + " ]),\n", + ")\n", + "\n", + "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"nested_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",)),\n", + " )\n", + " ],\n", + " )),\n", + ")\n", + "\n", + "text_annotation = lb_types.ClassificationAnnotation(\n", + " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", + "\n", + "bbox_dim = {\n", + " \"top\": 226.757,\n", + " \"left\": 317.271,\n", + " \"height\": 194.229,\n", + " \"width\": 249.386,\n", + "}\n", + "\n", + "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"bbox_with_radio_subclass\",\n", + " value=lb_types.DocumentRectangle(\n", + " start=lb_types.Point(x=bbox_dim[\"left\"],\n", + " y=bbox_dim[\"top\"]), # x = left, y = top\n", + " end=lb_types.Point(\n", + " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", + " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", + " ), # x= left + width , y = top + height\n", + " unit=lb_types.RectangleUnit.POINTS,\n", + " page=1,\n", + " ),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_radio_question\",\n", + " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", + " name=\"first_sub_radio_answer\",\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"second_sub_radio_question\",\n", + " value=lb_types.Radio(\n", + " answer=lb_types.ClassificationAnswer(\n", + " name=\"second_sub_radio_answer\")),\n", + " )\n", + " ],\n", + " )),\n", + " )\n", + " ],\n", + ")\n", + "\n", + "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", + " name=\"ner_with_checklist_subclass\",\n", + " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", + " text_selections=text_selections_ner),\n", + " classifications=[\n", + " lb_types.ClassificationAnnotation(\n", + " name=\"sub_checklist_question\",\n", + " value=lb_types.Checklist(answer=[\n", + " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", + " ]),\n", + " )\n", + " ],\n", + ")" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "labels = []\n", + "\n", + "labels.append(\n", + " lb_types.Label(\n", + " data={\"global_key\": global_key},\n", + " annotations=[\n", + " entities_annotation,\n", + " checklist_annotation,\n", + " nested_checklist_annotation,\n", + " text_annotation,\n", + " radio_annotation,\n", + " nested_radio_annotation,\n", + " bbox_annotation,\n", + " bbox_with_radio_subclass_annotation,\n", + " ner_with_checklist_subclass_annotation,\n", + " ],\n", + " ))" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "upload_job_annotation = lb.LabelImport.create_from_objects(\n", + " client=client,\n", + " project_id=project.uid,\n", + " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", + " labels=labels,\n", + ")\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n", + "print(\"Status of uploads: \", upload_job_annotation.statuses)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# get the labels id from the project\n", + "model_run.upsert_labels(project_id=project.uid)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/project_configuration/queue_management.ipynb b/examples/project_configuration/queue_management.ipynb index 0b62ea9d3..a5a450764 100644 --- a/examples/project_configuration/queue_management.ipynb +++ b/examples/project_configuration/queue_management.ipynb @@ -1,206 +1,381 @@ { - "nbformat": 4, - "nbformat_minor": 5, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "", - " ", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "# Queue Management" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "* The queue is used to task labelers with specific assets\n", - "* We can do any of the following:\n", - " * Set quality settings\n", - " * Set the order of items in the queue\n", - " * Set the percent of assets to review" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"\n%pip install -q numpy", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "import labelbox as lb\nfrom labelbox.schema.quality_mode import QualityMode\nfrom uuid import uuid4\nimport json", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# API Key and Client\n", - "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "### Set up demo project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "#### Create project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Create Labelbox project\n\nproject = client.create_project(\n name=\"batch-test-project\",\n description=\"a description\",\n quality_mode=QualityMode.\n Benchmark, # For Consensus projects use quality_mode = QualityMode.Consensus\n media_type=lb.MediaType.Image,\n)\n\ndataset = client.create_dataset(name=\"queue_dataset\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "#### Create ontology and attach to project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Add data to your dataset" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "## Example image\nuploads = []\nglobal_keys = []\n# Generate data rows\nfor i in range(1, 5):\n global_key = str(uuid4())\n row = {\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n global_key,\n }\n global_keys.append(global_key)\n uploads.append(row)\n\ndata_rows = dataset.create_data_rows(uploads)\ndata_rows.wait_till_done()\nprint(\"Errors\", data_rows.errors)\nprint(\"Dataset status: \", data_rows.status)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Attach data to your project and set data row priority" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "######## Create batches\n\n# Create the batch\n\nbatch = project.create_batch(\n \"batch-demo\", # Each batch in a project must have a unique name\n global_keys=global_keys[\n 0:2], # A list of data rows, data row ids or global keys\n priority=\n 5, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n)\n\nbatch2 = project.create_batch(\n \"batch-demo-2\", # Each batch in a project must have a unique name\n # Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n global_keys=global_keys[\n 2:4], # A list of data rows, data row ids or global keys\n priority=\n 1, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n)\n\nprint(\"Batch: \", batch)\nprint(\"Batch2: \", batch2)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "print(\"View the results here:\",\n f\"https://app.labelbox.com/projects/{project.uid}\")\n# Click `start labeling` to see the images in order", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "## Queue Order\n", - "- Add priority for each data row\n", - "- Update priority for each data row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "export_task = project.export()\nexport_task.wait_till_done()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Get data rows from project\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = json.loads(output.json)\n data_rows.append(lb.GlobalKey(data_row[\"data_row\"][\"global_key\"])\n ) # Convert json data row into data row identifier object\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Get label parameter overrides (LPOs)\nproject_lpos = project.labeling_parameter_overrides()\n\nfor lpo in project_lpos:\n print(lpo)\n print(\"Data row:\", lpo.data_row().uid)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Add LPOs\nlpos = []\npriority = 1\nfor data_row in data_rows:\n lpos.append((data_row, priority))\n priority += 1\n\nproject.set_labeling_parameter_overrides(lpos)\n\n# Check results\nproject_lpos = list(project.labeling_parameter_overrides())\n\nfor lpo in project_lpos:\n print(lpo)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Update LPOs\nglobal_keys = []\nfor data_row in data_rows:\n global_keys.append(data_row.key)\n\nproject.update_data_row_labeling_priority(data_rows=lb.GlobalKeys(global_keys),\n priority=1)\n\n# Check results\nproject_lpos = list(project.labeling_parameter_overrides())\n\nfor lpo in project_lpos:\n print(lpo)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Cleanup" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# project.delete()\n# dataset.delete()", - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + " \n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Queue Management" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* The queue is used to task labelers with specific assets\n", + "* We can do any of the following:\n", + " * Set quality settings\n", + " * Set the order of items in the queue\n", + " * Set the percent of assets to review" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -q \"labelbox[data]\"\n", + "%pip install -q numpy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import labelbox as lb\n", + "from labelbox.schema.quality_mode import QualityMode\n", + "from uuid import uuid4\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# API Key and Client\n", + "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Add your API key\n", + "API_KEY = \"\"\n", + "client = lb.Client(api_key=API_KEY)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up demo project" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5920e0f", + "metadata": {}, + "outputs": [], + "source": [ + "# Create Labelbox project\n", + "\n", + "project = client.create_project(\n", + " name=\"batch-test-project\",\n", + " description=\"a description\",\n", + " quality_mode=QualityMode.Benchmark, # For Consensus projects use quality_mode = QualityMode.Consensus\n", + " media_type=lb.MediaType.Image,\n", + ")\n", + "\n", + "dataset = client.create_dataset(name=\"queue_dataset\")" + ] + }, + { + "cell_type": "markdown", + "id": "6b7db01c", + "metadata": {}, + "source": [ + "#### Create ontology and attach to project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a16c2b6a", + "metadata": {}, + "outputs": [], + "source": [ + "classification_features = [\n", + " lb.Classification(\n", + " class_type=lb.Classification.Type.CHECKLIST,\n", + " name=\"Quality Issues\",\n", + " options=[\n", + " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", + " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", + " ],\n", + " )\n", + "]\n", + "\n", + "ontology_builder = lb.OntologyBuilder(\n", + " tools=[], classifications=classification_features\n", + ")\n", + "\n", + "ontology = client.create_ontology(\n", + " \"Ontology from new features\",\n", + " ontology_builder.asdict(),\n", + " media_type=lb.MediaType.Image,\n", + ")\n", + "\n", + "project.setup_editor(ontology)" + ] + }, + { + "cell_type": "markdown", + "id": "12e17422", + "metadata": {}, + "source": [ + "# Add data to your dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e92e987", + "metadata": {}, + "outputs": [], + "source": [ + "## Example image\n", + "uploads = []\n", + "global_keys = []\n", + "# Generate data rows\n", + "for i in range(1, 5):\n", + " global_key = str(uuid4())\n", + " row = {\n", + " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", + " \"global_key\": global_key,\n", + " }\n", + " global_keys.append(global_key)\n", + " uploads.append(row)\n", + "\n", + "data_rows = dataset.create_data_rows(uploads)\n", + "data_rows.wait_till_done()\n", + "print(\"Errors\", data_rows.errors)\n", + "print(\"Dataset status: \", data_rows.status)" + ] + }, + { + "cell_type": "markdown", + "id": "ab98d095", + "metadata": {}, + "source": [ + "# Attach data to your project and set data row priority" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "004955b1", + "metadata": {}, + "outputs": [], + "source": [ + "######## Create batches\n", + "\n", + "# Create the batch\n", + "\n", + "batch = project.create_batch(\n", + " \"batch-demo\", # Each batch in a project must have a unique name\n", + " global_keys=global_keys[\n", + " 0:2\n", + " ], # A list of data rows, data row ids or global keys\n", + " priority=5, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", + ")\n", + "\n", + "batch2 = project.create_batch(\n", + " \"batch-demo-2\", # Each batch in a project must have a unique name\n", + " # Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n", + " global_keys=global_keys[\n", + " 2:4\n", + " ], # A list of data rows, data row ids or global keys\n", + " priority=1, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", + ")\n", + "\n", + "print(\"Batch: \", batch)\n", + "print(\"Batch2: \", batch2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a980733", + "metadata": {}, + "outputs": [], + "source": [ + "print(\n", + " \"View the results here:\", f\"https://app.labelbox.com/projects/{project.uid}\"\n", + ")\n", + "# Click `start labeling` to see the images in order" + ] + }, + { + "cell_type": "markdown", + "id": "ee8ef753", + "metadata": {}, + "source": [ + "## Queue Order\n", + "- Add priority for each data row\n", + "- Update priority for each data row" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8deb361a", + "metadata": {}, + "outputs": [], + "source": [ + "export_task = project.export()\n", + "export_task.wait_till_done()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6c48e8f", + "metadata": {}, + "outputs": [], + "source": [ + "# Get data rows from project\n", + "data_rows = []\n", + "\n", + "\n", + "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", + " data_row = output.json\n", + " data_rows.append(\n", + " lb.GlobalKey(data_row[\"data_row\"][\"global_key\"])\n", + " ) # Convert json data row into data row identifier object\n", + "\n", + "\n", + "if export_task.has_errors():\n", + " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", + " stream_handler=lambda error: print(error)\n", + " )\n", + "\n", + "if export_task.has_result():\n", + " export_json = export_task.get_buffered_stream(\n", + " stream_type=lb.StreamType.RESULT\n", + " ).start(stream_handler=json_stream_handler)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7fde932", + "metadata": {}, + "outputs": [], + "source": [ + "# Get label parameter overrides (LPOs)\n", + "project_lpos = project.labeling_parameter_overrides()\n", + "\n", + "for lpo in project_lpos:\n", + " print(lpo)\n", + " print(\"Data row:\", lpo.data_row().uid)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e7cb187", + "metadata": {}, + "outputs": [], + "source": [ + "# Add LPOs\n", + "lpos = []\n", + "priority = 1\n", + "for data_row in data_rows:\n", + " lpos.append((data_row, priority))\n", + " priority += 1\n", + "\n", + "project.set_labeling_parameter_overrides(lpos)\n", + "\n", + "# Check results\n", + "project_lpos = list(project.labeling_parameter_overrides())\n", + "\n", + "for lpo in project_lpos:\n", + " print(lpo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a4bed6b", + "metadata": {}, + "outputs": [], + "source": [ + "# Update LPOs\n", + "global_keys = []\n", + "for data_row in data_rows:\n", + " global_keys.append(data_row.key)\n", + "\n", + "project.update_data_row_labeling_priority(\n", + " data_rows=lb.GlobalKeys(global_keys), priority=1\n", + ")\n", + "\n", + "# Check results\n", + "project_lpos = list(project.labeling_parameter_overrides())\n", + "\n", + "for lpo in project_lpos:\n", + " print(lpo)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cleanup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From bfc433208c4bb047b85b00d7d536a121d331cfb0 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:25:20 -0500 Subject: [PATCH 3/3] removed metadata --- examples/annotation_import/pdf.ipynb | 1030 ++-------- examples/basics/batches.ipynb | 809 +++----- examples/basics/custom_embeddings.ipynb | 301 +-- examples/basics/data_rows.ipynb | 891 +++----- examples/basics/projects.ipynb | 469 ++--- examples/exports/composite_mask_export.ipynb | 269 +-- .../custom_metrics_demo.ipynb | 1810 ++--------------- examples/model_experiments/model_slices.ipynb | 277 +-- .../prediction_upload/pdf_predictions.ipynb | 1046 ++-------- .../queue_management.ipynb | 585 ++---- 10 files changed, 1613 insertions(+), 5874 deletions(-) diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb index 526620dec..bcdd0ab69 100644 --- a/examples/annotation_import/pdf.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 1, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -54,538 +56,137 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_annotations = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_annotations_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", - " end=lb_types.Point(x=518.571,\n", - " y=245.143), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_annotation_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 135.3,\n", - " \"left\": 102.771,\n", - " \"height\": 109.843,\n", - " \"width\": 415.8\n", - " },\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", - " end=lb_types.Point(x=566.657,\n", - " y=420.986), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### Relationships ##########\n", - "entity_source = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_target = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "entity_source_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_source,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "\n", - "entity_target_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_target,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with relationships #############\n", - "# Python Annotation\n", - "bbox_source = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", - " end=lb_types.Point(x=270.907,\n", - " y=149.556), # x = left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_target = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=96.424, y=66.251),\n", - " end=lb_types.Point(x=179.074, y=146.932),\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=bbox_source,\n", - " target=bbox_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source_2 = str(uuid.uuid4())\n", - "uuid_target_2 = str(uuid.uuid4())\n", - "\n", - "bbox_source_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_source_2,\n", - " \"bbox\": {\n", - " \"top\": 68.875,\n", - " \"left\": 188.257,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_target_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_target_2,\n", - " \"bbox\": {\n", - " \"top\": 66.251,\n", - " \"left\": 96.424,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source_2,\n", - " \"target\": uuid_target_2,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -599,206 +200,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"PDF_annotation_demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -807,357 +262,124 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "client.enable_experimental = True\n", - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_buffered_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = output.json\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - " \"Organic charge transfer salts based on the donor\",\n", - " \"the experimental investigations on this issue have not\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "text_selections_source = []\n", - "text_selections_target = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_annotations_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[2]:\n", - " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_source = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", - " text_selections_source.append(text_selection_entity_source)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_source_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_source, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[3]:\n", - " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_target = lb_types.DocumentTextSelection(\n", - " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", - " text_selections_target.append(text_selection_entity_target)\n", - " # build text selections forthe NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_target_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_target, # ids representing individual words from the group\n", - " page=1,\n", - " )" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# re-write the entity annotation with text selections\n", - "entities_annotation_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", textSelections=text_selections)\n", - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_annotation_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# re-write the entity source and target annotations withe text selectios\n", - "entity_source_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_source)\n", - "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_source_doc)\n", - "\n", - "entity_target_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_target)\n", - "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_target_doc)\n", - "\n", - "# re-write the entity relationship with the re-created entities\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", - "print(f\"entities_annotation={entities_annotation}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", - "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", - "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", - "print(f\"entity_source={entity_source}\")\n", - "print(f\"entity_target={entity_target}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " entity_source,\n", - " entity_target,\n", - " entity_relationship, # Only supported for MAL imports\n", - " bbox_source,\n", - " bbox_target,\n", - " bbox_relationship, # Only supported for MAL imports\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annot in [\n", - " entities_annotations_ndjson,\n", - " checklist_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " bbox_with_radio_subclass_annotation_ndjson,\n", - " ner_with_checklist_subclass_annotation_ndjson,\n", - " entity_source_ndjson,\n", - " entity_target_ndjson,\n", - " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", - " bbox_source_ndjson,\n", - " bbox_target_ndjson,\n", - " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Uncomment this code when excluding relationships from label import\n", - "## Relationships are not currently supported for label import\n", - "\n", - "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client,\n", - "# project_id = project.uid,\n", - "# name=\"label_import_job\"+str(uuid.uuid4()),\n", - "# labels=labels) ## Remove unsupported relationships from the labels list\n", - "\n", - "# print(\"Errors:\", upload_job.errors)\n", - "# print(\"Status of uploads: \", upload_job.statuses)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + ] +} \ No newline at end of file diff --git a/examples/basics/batches.ipynb b/examples/basics/batches.ipynb index f111bdce6..870dcbb23 100644 --- a/examples/basics/batches.ipynb +++ b/examples/basics/batches.ipynb @@ -1,504 +1,307 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Batches" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook covers the basics of batches:\n", - "\n", - "* A batch is collection of data rows.\n", - "* A data row cannot be part of more than one batch in a given project.\n", - "* Batches work for all data types, but there can only be one data type per project.\n", - "* Batches can not be shared between projects.\n", - "* Batches may have data rows from multiple datasets.\n", - "* Currently, only benchmarks quality settings is supported in batch projects\n", - "* You can set the priority for each batch." - ] - }, - { - "cell_type": "markdown", - "id": "b5641a6d", - "metadata": {}, - "source": [ - "## Set up" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q --upgrade \"labelbox[data]\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import labelbox as lb\n", - "import random\n", - "import uuid\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## API key and client\n", - "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "API_KEY = None\n", - "client = lb.Client(api_key=API_KEY)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a dataset and data rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "62639b4f", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a dataset\n", - "dataset = client.create_dataset(name=\"Demo-Batches-Colab\")\n", - "\n", - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 9):\n", - " uploads.append(\n", - " {\n", - " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", - " }\n", - " )\n", - "\n", - "data_rows = dataset.create_data_rows(uploads)\n", - "data_rows.wait_till_done()\n", - "print(\"ERRORS: \", data_rows.errors)\n", - "print(\"RESULT URL: \", data_rows.result_url)" - ] - }, - { - "cell_type": "markdown", - "id": "d2384377", - "metadata": {}, - "source": [ - "## Setup batch project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ddabec74", - "metadata": {}, - "outputs": [], - "source": [ - "project = client.create_project(\n", - " name=\"Demo-Batches-Project\", media_type=lb.MediaType.Image\n", - ")\n", - "print(\"Project Name: \", project.name, \"Project ID: \", project.uid)" - ] - }, - { - "cell_type": "markdown", - "id": "6a242cc8", - "metadata": {}, - "source": [ - "## Create batches" - ] - }, - { - "cell_type": "markdown", - "id": "bf21de1c", - "metadata": {}, - "source": [ - "### Select all data rows from the dataset\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23edfe21", - "metadata": {}, - "outputs": [], - "source": [ - "export_task = dataset.export()\n", - "export_task.wait_till_done()\n", - "\n", - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(data_row)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error)\n", - " )\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT\n", - " ).start(stream_handler=json_stream_handler)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f90a541c", - "metadata": {}, - "outputs": [], - "source": [ - "global_keys = [data_row[\"data_row\"][\"global_key\"] for data_row in data_rows]\n", - "print(\"Number of global keys:\", len(global_keys))" - ] - }, - { - "cell_type": "markdown", - "id": "1676b642", - "metadata": {}, - "source": [ - "### Select a random sample\n", - "This method is useful if you have large datasets and only want to work with a handful of data rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "591c131a", - "metadata": {}, - "outputs": [], - "source": [ - "sample = random.sample(global_keys, 4)" - ] - }, - { - "cell_type": "markdown", - "id": "be5d70cb", - "metadata": {}, - "source": [ - "### Create a batch\n", - "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e1683234", - "metadata": {}, - "outputs": [], - "source": [ - "batch = project.create_batch(\n", - " name=\"Demo-First-Batch\", # Each batch in a project must have a unique name\n", - " global_keys=sample, # A list of data rows or data row ids\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "# number of data rows in the batch\n", - "print(\"Number of data rows in batch: \", batch.size)" - ] - }, - { - "cell_type": "markdown", - "id": "0937b16c", - "metadata": {}, - "source": [ - "### Create multiple batches\n", - "The `project.create_batches()` method accepts up to 1 million data rows. Batches are chunked into groups of 100k if necessary, which is the maximum batch size. This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method.\n", - "\n", - "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method. Batches will be created with the specified `name_prefix` argument and a unique suffix to ensure unique batch names. The suffix will be a 4-digit number starting at `0000`.\n", - "\n", - "For example, if the name prefix is `demo-create-batches-` and three batches are created, the names will be `demo-create-batches-0000`, `demo-create-batches-0001`, and `demo-create-batches-0002`. This method will throw an error if a batch with the same name already exists.\n", - "\n", - "In the code below, only one batch will be created, since we are only using the few data rows we created above. Creating over 100k data rows for this demonstration is not sensible, but this method is the preferred approach for batch creation as it will gracefully handle massive sets of data rows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e07f8bc8", - "metadata": {}, - "outputs": [], - "source": [ - "# First, we must create a second project so that we can re-use the data rows we already created.\n", - "second_project = client.create_project(\n", - " name=\"Second-Demo-Batches-Project\", media_type=lb.MediaType.Image\n", - ")\n", - "print(\"Project Name: \", second_project.name, \"Project ID: \", second_project.uid)\n", - "\n", - "# Then, use the method that will create multiple batches if necessary.\n", - "task = second_project.create_batches(\n", - " name_prefix=\"demo-create-batches-\", global_keys=global_keys, priority=5\n", - ")\n", - "\n", - "print(\"Errors: \", task.errors())\n", - "print(\"Result: \", task.result())" - ] - }, - { - "cell_type": "markdown", - "id": "8b094283", - "metadata": {}, - "source": [ - "### Create batches from a dataset\n", - "\n", - "If you wish to create batches in a project using all the data rows of a dataset, instead of having to gather global keys or ID and using subsets of data rows, you can use the `project.create_batches_from_dataset()` method. This method takes in a dataset ID and creates a batch (or batches if there are more than 100k data rows) comprised of all data rows not already in the project.\n", - "\n", - "The same logic applies to the `name_prefix` argument and the naming of batches as described in the section immediately above." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "597ad3c6", - "metadata": {}, - "outputs": [], - "source": [ - "# First, we must create a third project so that we can re-use the data rows we already created.\n", - "third_project = client.create_project(\n", - " name=\"Third-Demo-Batches-Project\", media_type=lb.MediaType.Image\n", - ")\n", - "print(\"Project Name: \", third_project.name, \"Project ID: \", third_project.uid)\n", - "\n", - "# Then, use the method to create batches from a dataset.\n", - "task = third_project.create_batches_from_dataset(\n", - " name_prefix=\"demo-batches-from-dataset-\", dataset_id=dataset.uid, priority=5\n", - ")\n", - "\n", - "print(\"Errors: \", task.errors())\n", - "print(\"Result: \", task.result())" - ] - }, - { - "cell_type": "markdown", - "id": "bf36d1f4", - "metadata": {}, - "source": [ - "## Manage Batches\n", - "Note: You can view your batch data through the **Data Rows** tab." - ] - }, - { - "cell_type": "markdown", - "id": "920f0336", - "metadata": {}, - "source": [ - "### Export Batches" - ] - }, - { - "cell_type": "markdown", - "id": "646f4bd7", - "metadata": {}, - "source": [ - "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ] - }, - { - "cell_type": "markdown", - "id": "9292f4a2", - "metadata": {}, - "source": [ - "#### Create and Attach Ontology to Project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1a11b9b", - "metadata": {}, - "outputs": [], - "source": [ - "classification_features = [\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"Quality Issues\",\n", - " options=[\n", - " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", - " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", - " ],\n", - " )\n", - "]\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[], classifications=classification_features\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology from new features\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")\n", - "\n", - "project.setup_editor(ontology)" - ] - }, - { - "cell_type": "markdown", - "id": "c8e6723d", - "metadata": {}, - "source": [ - "#### Export from Project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "506f8c9c", - "metadata": {}, - "outputs": [], - "source": [ - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"performance_details\": True,\n", - " \"batch_ids\": [\n", - " batch.uid\n", - " ], # Include batch ids if you only want to export specific batches, otherwise,\n", - " # you can export all the data without using this parameter\n", - "}\n", - "filters = {}\n", - "\n", - "# A task is returned, this provides additional information about the status of your task, such as\n", - "# any errors encountered\n", - "export_task = project.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a3ba9797", - "metadata": {}, - "outputs": [], - "source": [ - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(data_row)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error)\n", - " )\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT\n", - " ).start(stream_handler=json_stream_handler)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "## Export the data row iDs\n", - "data_rows = [dr for dr in data_rows]\n", - "print(\"Data rows in batch: \", data_rows)\n", - "\n", - "## List the batches in your project\n", - "for batch in project.batches():\n", - " print(\"Batch name: \", batch.name, \" Batch ID:\", batch.uid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Archive a batch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Archiving a batch removes all queued data rows in the batch from the project\n", - "batch.remove_queued_data_rows()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Clean up\n", - "Uncomment and run the cell below to optionally delete Labelbox objects created." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# batch.delete()\n", - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "", + " ", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Batches" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "This notebook covers the basics of batches:\n", + "\n", + "* A batch is collection of data rows.\n", + "* A data row cannot be part of more than one batch in a given project.\n", + "* Batches work for all data types, but there can only be one data type per project.\n", + "* Batches can not be shared between projects.\n", + "* Batches may have data rows from multiple datasets.\n", + "* Currently, only benchmarks quality settings is supported in batch projects\n", + "* You can set the priority for each batch." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "## Set up" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "%pip install -q --upgrade \"labelbox[data]\"", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "import labelbox as lb\nimport random\nimport uuid\nimport json", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## API key and client\n", + "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Create a dataset and data rows" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Create a dataset\ndataset = client.create_dataset(name=\"Demo-Batches-Colab\")\n\nuploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n })\n\ndata_rows = dataset.create_data_rows(uploads)\ndata_rows.wait_till_done()\nprint(\"ERRORS: \", data_rows.errors)\nprint(\"RESULT URL: \", data_rows.result_url)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Setup batch project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "project = client.create_project(name=\"Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", project.name, \"Project ID: \", project.uid)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Create batches" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Select all data rows from the dataset\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "export_task = dataset.export()\nexport_task.wait_till_done()\n\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "global_keys = [data_row[\"data_row\"][\"global_key\"] for data_row in data_rows]\nprint(\"Number of global keys:\", len(global_keys))", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Select a random sample\n", + "This method is useful if you have large datasets and only want to work with a handful of data rows" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "sample = random.sample(global_keys, 4)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Create a batch\n", + "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "batch = project.create_batch(\n name=\"Demo-First-Batch\", # Each batch in a project must have a unique name\n global_keys=sample, # A list of data rows or data row ids\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n# number of data rows in the batch\nprint(\"Number of data rows in batch: \", batch.size)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Create multiple batches\n", + "The `project.create_batches()` method accepts up to 1 million data rows. Batches are chunked into groups of 100k if necessary, which is the maximum batch size. This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method.\n", + "\n", + "This method takes in a list of either data row IDs or `DataRow` objects into a `data_rows` argument or global keys into a `global_keys` argument, but both approaches cannot be used in the same method. Batches will be created with the specified `name_prefix` argument and a unique suffix to ensure unique batch names. The suffix will be a 4-digit number starting at `0000`.\n", + "\n", + "For example, if the name prefix is `demo-create-batches-` and three batches are created, the names will be `demo-create-batches-0000`, `demo-create-batches-0001`, and `demo-create-batches-0002`. This method will throw an error if a batch with the same name already exists.\n", + "\n", + "In the code below, only one batch will be created, since we are only using the few data rows we created above. Creating over 100k data rows for this demonstration is not sensible, but this method is the preferred approach for batch creation as it will gracefully handle massive sets of data rows." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# First, we must create a second project so that we can re-use the data rows we already created.\nsecond_project = client.create_project(name=\"Second-Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", second_project.name, \"Project ID: \", second_project.uid)\n\n# Then, use the method that will create multiple batches if necessary.\ntask = second_project.create_batches(name_prefix=\"demo-create-batches-\",\n global_keys=global_keys,\n priority=5)\n\nprint(\"Errors: \", task.errors())\nprint(\"Result: \", task.result())", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Create batches from a dataset\n", + "\n", + "If you wish to create batches in a project using all the data rows of a dataset, instead of having to gather global keys or ID and using subsets of data rows, you can use the `project.create_batches_from_dataset()` method. This method takes in a dataset ID and creates a batch (or batches if there are more than 100k data rows) comprised of all data rows not already in the project.\n", + "\n", + "The same logic applies to the `name_prefix` argument and the naming of batches as described in the section immediately above." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# First, we must create a third project so that we can re-use the data rows we already created.\nthird_project = client.create_project(name=\"Third-Demo-Batches-Project\",\n media_type=lb.MediaType.Image)\nprint(\"Project Name: \", third_project.name, \"Project ID: \", third_project.uid)\n\n# Then, use the method to create batches from a dataset.\ntask = third_project.create_batches_from_dataset(\n name_prefix=\"demo-batches-from-dataset-\",\n dataset_id=dataset.uid,\n priority=5)\n\nprint(\"Errors: \", task.errors())\nprint(\"Result: \", task.result())", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Manage Batches\n", + "Note: You can view your batch data through the **Data Rows** tab." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Export Batches" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Create and Attach Ontology to Project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Export from Project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\": [\n batch.uid\n ], # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "## Export the data row iDs\ndata_rows = [dr for dr in data_rows]\nprint(\"Data rows in batch: \", data_rows)\n\n## List the batches in your project\nfor batch in project.batches():\n print(\"Batch name: \", batch.name, \" Batch ID:\", batch.uid)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Archive a batch" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Archiving a batch removes all queued data rows in the batch from the project\nbatch.remove_queued_data_rows()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Clean up\n", + "Uncomment and run the cell below to optionally delete Labelbox objects created." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# batch.delete()\n# project.delete()\n# dataset.delete()", + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/basics/custom_embeddings.ipynb b/examples/basics/custom_embeddings.ipynb index 71d66d31f..4c483ba74 100644 --- a/examples/basics/custom_embeddings.ipynb +++ b/examples/basics/custom_embeddings.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,366 +24,263 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Custom Embeddings\n", "\n", "You can improve your data exploration and similarity search experience by adding your own custom embeddings. Labelbox allows you to upload up to 10 different custom embeddings per workspace on any kind of data. You can experiment with different embeddings to power your data selection." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Set up " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q --upgrade \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q --upgrade \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport numpy as np\nimport json\nimport uuid\nimport random", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import numpy as np\n", - "import json\n", - "import uuid\n", - "import random" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Select data rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "- Get images from a Labelbox dataset\n", "- To improve similarity search, you need to upload custom embeddings to at least 1,000 data rows.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "DATASET_ID = \"\"", + "cell_type": "code", "outputs": [], - "source": [ - "DATASET_ID = \"\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "dataset = client.get_dataset(dataset_id=DATASET_ID)\nexport_task = dataset.export()\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "dataset = client.get_dataset(dataset_id=DATASET_ID)\n", - "export_task = dataset.export()\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", "outputs": [], - "source": [ - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(data_row)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_row_dict = [{\"data_row_id\": dr[\"data_row\"][\"id\"]} for dr in data_rows]\ndata_row_dict = data_row_dict[:\n 1000] # keep the first 1000 examples for the sake of this demo", + "cell_type": "code", "outputs": [], - "source": [ - "data_row_dict = [{\"data_row_id\": dr[\"data_row\"][\"id\"]} for dr in data_rows]\n", - "data_row_dict = data_row_dict[:\n", - " 1000] # keep the first 1000 examples for the sake of this demo" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Create custom embedding payload " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Generate random vectors for embeddings (max : 2048 dimensions)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "nb_data_rows = len(data_row_dict)\nprint(\"Number of data rows: \", nb_data_rows)\n# Labelbox supports custom embedding vectors of dimension up to 2048\ncustom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]", + "cell_type": "code", "outputs": [], - "source": [ - "nb_data_rows = len(data_row_dict)\n", - "print(\"Number of data rows: \", nb_data_rows)\n", - "# Labelbox supports custom embedding vectors of dimension up to 2048\n", - "custom_embeddings = [list(np.random.random(2048)) for _ in range(nb_data_rows)]" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "List all custom embeddings available in your Labelbox workspace" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "embeddings = client.get_embeddings()", + "cell_type": "code", "outputs": [], - "source": [ - "embeddings = client.get_embeddings()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Choose an existing embedding type or create a new one" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Name of the custom embedding must be unique\nembedding = client.create_embedding(\"my_custom_embedding_2048_dimensions\", 2048)", + "cell_type": "code", "outputs": [], - "source": [ - "# Name of the custom embedding must be unique\n", - "embedding = client.create_embedding(\"my_custom_embedding_2048_dimensions\", 2048)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Create payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "The payload should encompass the `key` (data row id or global key) and the new embedding vector data. Note that the `dataset.upsert_data_rows()` operation will only update the values you pass in the payload; all other existing row data will not be modified." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "payload = []\nfor data_row_dict, custom_embedding in zip(data_row_dict, custom_embeddings):\n payload.append({\n \"key\":\n lb.UniqueId(data_row_dict[\"data_row_id\"]),\n \"embeddings\": [{\n \"embedding_id\": embedding.id,\n \"vector\": custom_embedding\n }],\n })\n\nprint(\"payload\", len(payload), payload[:1])", + "cell_type": "code", "outputs": [], - "source": [ - "payload = []\n", - "for data_row_dict, custom_embedding in zip(data_row_dict, custom_embeddings):\n", - " payload.append({\n", - " \"key\":\n", - " lb.UniqueId(data_row_dict[\"data_row_id\"]),\n", - " \"embeddings\": [{\n", - " \"embedding_id\": embedding.id,\n", - " \"vector\": custom_embedding\n", - " }],\n", - " })\n", - "\n", - "print(\"payload\", len(payload), payload[:1])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Upload payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Upsert data rows with custom embeddings" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "task = dataset.upsert_data_rows(payload)\ntask.wait_till_done()\nprint(task.errors)\nprint(task.status)", + "cell_type": "code", "outputs": [], - "source": [ - "task = dataset.upsert_data_rows(payload)\n", - "task.wait_till_done()\n", - "print(task.errors)\n", - "print(task.status)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Get the count of imported vectors for a custom embedding" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Count how many data rows have a specific custom embedding (this can take a couple of minutes)\ncount = embedding.get_imported_vector_count()\nprint(count)", + "cell_type": "code", "outputs": [], - "source": [ - "# Count how many data rows have a specific custom embedding (this can take a couple of minutes)\n", - "count = embedding.get_imported_vector_count()\n", - "print(count)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Delete custom embedding type" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# embedding.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# embedding.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Upload custom embeddings during data row creation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Create a dataset" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a dataset\ndataset_new = client.create_dataset(name=\"data_rows_with_embeddings\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a dataset\n", - "dataset_new = client.create_dataset(name=\"data_rows_with_embeddings\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Fetch an embedding (2048 dimension)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "embedding = client.get_embedding_by_name(\"my_custom_embedding_2048_dimensions\")\nvector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)]", + "cell_type": "code", "outputs": [], - "source": [ - "embedding = client.get_embedding_by_name(\"my_custom_embedding_2048_dimensions\")\n", - "vector = [random.uniform(1.0, 2.0) for _ in range(embedding.dims)]" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Upload data rows with embeddings" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n \"embeddings\": [{\n \"embedding_id\": embedding.id,\n \"vector\": vector\n }],\n })\n\ntask1 = dataset_new.create_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", + "cell_type": "code", "outputs": [], - "source": [ - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 9):\n", - " uploads.append({\n", - " \"row_data\":\n", - " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\":\n", - " \"TEST-ID-%id\" % uuid.uuid1(),\n", - " \"embeddings\": [{\n", - " \"embedding_id\": embedding.id,\n", - " \"vector\": vector\n", - " }],\n", - " })\n", - "\n", - "task1 = dataset_new.create_data_rows(uploads)\n", - "task1.wait_till_done()\n", - "print(\"ERRORS: \", task1.errors)\n", - "print(\"RESULTS:\", task1.result)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/basics/data_rows.ipynb b/examples/basics/data_rows.ipynb index 1efb4967f..f17e6fa65 100644 --- a/examples/basics/data_rows.ipynb +++ b/examples/basics/data_rows.ipynb @@ -1,578 +1,315 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data rows" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", - " * Image\n", - " * Text\n", - " * Video\n", - " * Geospatial / Tiled Imagery\n", - " * Audio\n", - " * Documents \n", - " * HTML \n", - " * DICOM \n", - " * Conversational\n", - "* A data row cannot exist without belonging to a dataset.\n", - "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install labelbox -q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Get data rows from projects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Pick a project with batches that have data rows with global keys\n", - "PROJECT_ID = \"\"\n", - "project = client.get_project(PROJECT_ID)\n", - "batches = list(project.batches())\n", - "print(batches)\n", - "# This is the same as\n", - "# -> dataset = client.get_dataset(dataset_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fetch data rows from project's batches\n", - "\n", - "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f576eb64", - "metadata": {}, - "outputs": [], - "source": [ - "client.enable_experimental = True\n", - "\n", - "batch_ids = [batch.uid for batch in batches]\n", - "\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"performance_details\": True,\n", - " \"batch_ids\": batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n", - " # you can export all the data without using this parameter\n", - "}\n", - "filters = {}\n", - "\n", - "# A task is returned, this provides additional information about the status of your task, such as\n", - "# any errors encountered\n", - "export_task = project.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d102d73e", - "metadata": {}, - "outputs": [], - "source": [ - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(data_row)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error)\n", - " )\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT\n", - " ).start(stream_handler=json_stream_handler)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "57300cab", - "metadata": {}, - "outputs": [], - "source": [ - "# Get single data row\n", - "data_row = data_rows[0]\n", - "print(data_row)" - ] - }, - { - "cell_type": "markdown", - "id": "fc079896", - "metadata": {}, - "source": [ - "### Get labels from the data row" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ee4b998", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\n", - "print(\"Global key\", data_row[\"data_row\"][\"global_key\"])" - ] - }, - { - "cell_type": "markdown", - "id": "89edfb3c", - "metadata": {}, - "source": [ - "### Get data row ids by using global keys" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e539795", - "metadata": {}, - "outputs": [], - "source": [ - "global_key = \"\"\n", - "task = client.get_data_row_ids_for_global_keys([global_key])\n", - "print(f\"Data row id: {task['results']}\")" - ] - }, - { - "cell_type": "markdown", - "id": "aa928b53", - "metadata": {}, - "source": [ - "## Create\n", - "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " - ] - }, - { - "cell_type": "markdown", - "id": "7342388c", - "metadata": {}, - "source": [ - "### Create data rows via `dataset.upsert_data_rows()`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86e4e2b8", - "metadata": {}, - "outputs": [], - "source": [ - "# Create a dataset\n", - "dataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n", - "# You can also upload metadata along with your data row\n", - "mdo = client.get_data_row_metadata_ontology()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f52ac7a2", - "metadata": {}, - "outputs": [], - "source": [ - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 8):\n", - " uploads.append(\n", - " {\n", - " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", - " ## add metadata (optional)\n", - " \"metadata_fields\": [\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\n", - " \"tag\"\n", - " ].uid, # specify the schema id\n", - " value=\"tag_string\", # typed inputs\n", - " ),\n", - " ],\n", - " \"attachments\": [\n", - " {\n", - " \"type\": \"IMAGE_OVERLAY\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", - " },\n", - " {\n", - " \"type\": \"RAW_TEXT\",\n", - " \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n", - " },\n", - " {\n", - " \"type\": \"TEXT_URL\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", - " },\n", - " {\n", - " \"type\": \"IMAGE\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", - " },\n", - " {\n", - " \"type\": \"VIDEO\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n", - " },\n", - " {\n", - " \"type\": \"HTML\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n", - " },\n", - " {\n", - " \"type\": \"PDF_URL\",\n", - " \"value\": \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " },\n", - " ],\n", - " }\n", - " )\n", - "\n", - "task1 = dataset.upsert_data_rows(uploads)\n", - "task1.wait_till_done()\n", - "print(\"ERRORS: \", task1.errors)\n", - "print(\"RESULTS:\", task1.result)" - ] - }, - { - "cell_type": "markdown", - "id": "7c2c4cdc", - "metadata": {}, - "source": [ - "Create data rows from data in your local path " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ed492ffc", - "metadata": {}, - "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "# Create dummy empty jpeg file\n", - "width = 400\n", - "height = 300\n", - "color = (255, 255, 255) # White color\n", - "image = Image.new(\"RGB\", (width, height), color)\n", - "\n", - "# Save the image as a JPEG file\n", - "image.save(\"dummy.jpg\")\n", - "\n", - "local_data_path = \"dummy.jpg\"\n", - "\n", - "data = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n", - "\n", - "task3 = dataset.upsert_data_rows([data])\n", - "task3.wait_till_done()\n", - "print(\"ERRORS: \", task3.errors)\n", - "print(\"RESULTS:\", task3.result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "33db6b42", - "metadata": {}, - "outputs": [], - "source": [ - "# You can mix local files with urls when creating data rows\n", - "task4 = dataset.upsert_data_rows(\n", - " [\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", - " \"global_key\": str(uuid.uuid4()),\n", - " },\n", - " {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())},\n", - " ]\n", - ")\n", - "task4.wait_till_done()\n", - "print(\"ERRORS: \", task4.errors)\n", - "print(\"RESULTS:\", task4.result)" - ] - }, - { - "cell_type": "markdown", - "id": "75a359ae", - "metadata": {}, - "source": [ - "### Create data rows via `dataset.create_data_rows()`\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b02d317", - "metadata": {}, - "outputs": [], - "source": [ - "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "71871d7e", - "metadata": {}, - "outputs": [], - "source": [ - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 9):\n", - " uploads.append(\n", - " {\n", - " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1(),\n", - " ## add metadata (optional)\n", - " \"metadata_fields\": [\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\n", - " \"tag\"\n", - " ].uid, # specify the schema id\n", - " value=\"tag_string\", # typed inputs\n", - " ),\n", - " ],\n", - " }\n", - " )\n", - "\n", - "task1_2 = dataset_2.create_data_rows(uploads)\n", - "task1_2.wait_till_done()\n", - "print(\"ERRORS: \", task1_2.errors)\n", - "print(\"RESULTS:\", task1_2.result)" - ] - }, - { - "cell_type": "markdown", - "id": "4ef3f18e", - "metadata": {}, - "source": [ - "### Update\n", - "`dataset.upsert_data_rows()` can also be use to update data rows\n", - "\n", - "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a41ef462", - "metadata": {}, - "outputs": [], - "source": [ - "# Fetch a data row from the first dataset example\n", - "ts = dataset.export()\n", - "ts.wait_till_done()\n", - "DATA_ROW_ID = [output.json for output in ts.get_buffered_stream()][\n", - " 0\n", - "][\"data_row\"][\"id\"]\n", - "GLOBAL_KEY = [output.json for output in ts.get_buffered_stream()][\n", - " 0\n", - "][\"data_row\"][\"global_key\"]\n", - "\n", - "print(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86628344", - "metadata": {}, - "outputs": [], - "source": [ - "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n", - "data = {\n", - " \"key\": lb.UniqueId(DATA_ROW_ID),\n", - " \"global_key\": \"NEW-ID-%id\" % uuid.uuid1(),\n", - " \"metadata_fields\": [\n", - " # New metadata\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n", - " value=\"2000-01-01 00:00:00\",\n", - " ),\n", - " # Include original metadata otherwise it will be removed\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"tag\"].uid,\n", - " value=\"tag_string\",\n", - " ),\n", - " ],\n", - "}\n", - "\n", - "task5 = dataset_2.upsert_data_rows([data])\n", - "task5.wait_till_done()\n", - "print(\"ERRORS: \", task5.errors)\n", - "print(\"RESULTS:\", task5.result)" - ] - }, - { - "cell_type": "markdown", - "id": "f9f9cb02", - "metadata": {}, - "source": [ - "### Create a single attachment on an existing data row" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b990d63", - "metadata": {}, - "outputs": [], - "source": [ - "# You can only create one attachment at the time.\n", - "DATA_ROW_ID = \"\"\n", - "data_row = client.get_data_row(DATA_ROW_ID)\n", - "attachment = data_row.create_attachment(\n", - " attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Update a recently created attachment " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Delete a single data row" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DATAROW_ID_TO_DELETE = \"\"\n", - "data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n", - "data_row.delete()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Bulk delete data row objects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\n", - "lb.DataRow.bulk_delete(list(dataset.data_rows()))" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "", + " ", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Data rows" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", + " * Image\n", + " * Text\n", + " * Video\n", + " * Geospatial / Tiled Imagery\n", + " * Audio\n", + " * Documents \n", + " * HTML \n", + " * DICOM \n", + " * Conversational\n", + "* A data row cannot exist without belonging to a dataset.\n", + "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "%pip install labelbox -q", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport json", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# API Key and Client\n", + "Provide a valid api key below in order to properly connect to the Labelbox Client." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Get data rows from projects" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Pick a project with batches that have data rows with global keys\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)\nbatches = list(project.batches())\nprint(batches)\n# This is the same as\n# -> dataset = client.get_dataset(dataset_id)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Fetch data rows from project's batches\n", + "\n", + "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "client.enable_experimental = True\n\nbatch_ids = [batch.uid for batch in batches]\n\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\":\n batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Get single data row\ndata_row = data_rows[0]\nprint(data_row)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Get labels from the data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\nprint(\"Global key\", data_row[\"data_row\"][\"global_key\"])", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Get data row ids by using global keys" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "global_key = \"\"\ntask = client.get_data_row_ids_for_global_keys([global_key])\nprint(f\"Data row id: {task['results']}\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Create\n", + "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "### Create data rows via `dataset.upsert_data_rows()`" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Create a dataset\ndataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n# You can also upload metadata along with your data row\nmdo = client.get_data_row_metadata_ontology()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "uploads = []\n# Generate data rows\nfor i in range(1, 8):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n \"attachments\": [\n {\n \"type\":\n \"IMAGE_OVERLAY\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\": \"RAW_TEXT\",\n \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n },\n {\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n },\n {\n \"type\":\n \"IMAGE\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\":\n \"VIDEO\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n },\n {\n \"type\":\n \"HTML\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n },\n {\n \"type\":\n \"PDF_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n },\n ],\n })\n\ntask1 = dataset.upsert_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "Create data rows from data in your local path " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "from PIL import Image\n\n# Create dummy empty jpeg file\nwidth = 400\nheight = 300\ncolor = (255, 255, 255) # White color\nimage = Image.new(\"RGB\", (width, height), color)\n\n# Save the image as a JPEG file\nimage.save(\"dummy.jpg\")\n\nlocal_data_path = \"dummy.jpg\"\n\ndata = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n\ntask3 = dataset.upsert_data_rows([data])\ntask3.wait_till_done()\nprint(\"ERRORS: \", task3.errors)\nprint(\"RESULTS:\", task3.result)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# You can mix local files with urls when creating data rows\ntask4 = dataset.upsert_data_rows([\n {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n \"global_key\":\n str(uuid.uuid4()),\n },\n {\n \"row_data\": local_data_path,\n \"global_key\": str(uuid.uuid4())\n },\n])\ntask4.wait_till_done()\nprint(\"ERRORS: \", task4.errors)\nprint(\"RESULTS:\", task4.result)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Create data rows via `dataset.create_data_rows()`\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n })\n\ntask1_2 = dataset_2.create_data_rows(uploads)\ntask1_2.wait_till_done()\nprint(\"ERRORS: \", task1_2.errors)\nprint(\"RESULTS:\", task1_2.result)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Update\n", + "`dataset.upsert_data_rows()` can also be use to update data rows\n", + "\n", + "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Fetch a data row from the first dataset example\nts = dataset.export()\nts.wait_till_done()\nDATA_ROW_ID = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"id\"]\nGLOBAL_KEY = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"global_key\"]\n\nprint(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\ndata = {\n \"key\":\n lb.UniqueId(DATA_ROW_ID),\n \"global_key\":\n \"NEW-ID-%id\" % uuid.uuid1(),\n \"metadata_fields\": [\n # New metadata\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n value=\"2000-01-01 00:00:00\",\n ),\n # Include original metadata otherwise it will be removed\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].uid,\n value=\"tag_string\",\n ),\n ],\n}\n\ntask5 = dataset_2.upsert_data_rows([data])\ntask5.wait_till_done()\nprint(\"ERRORS: \", task5.errors)\nprint(\"RESULTS:\", task5.result)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Create a single attachment on an existing data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# You can only create one attachment at the time.\nDATA_ROW_ID = \"\"\ndata_row = client.get_data_row(DATA_ROW_ID)\nattachment = data_row.create_attachment(\n attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "Update a recently created attachment " + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Delete" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* Delete a single data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "DATAROW_ID_TO_DELETE = \"\"\ndata_row = client.get_data_row(DATAROW_ID_TO_DELETE)\ndata_row.delete()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "* Bulk delete data row objects" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\nlb.DataRow.bulk_delete(list(dataset.data_rows()))", + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/examples/basics/projects.ipynb b/examples/basics/projects.ipynb index 1201c9b56..6bebba732 100644 --- a/examples/basics/projects.ipynb +++ b/examples/basics/projects.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,18 +24,18 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Projects\n", "This notebook covers the basics of projects:" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* A project can be thought of as a specific labeling task on a set of labels\n", @@ -41,440 +43,282 @@ "* Each project has an ontology which defines the types of annotations supported during the labeling process\n", "**Note that there is a lot of advanced usage that is not covered in this notebook. See examples/project_configuration/project_setup.ipynb for those functions**\n", "* Also note that deprecated functions are not explained here." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", - "id": "864da4c5", "metadata": {}, "source": [ "## Set up" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q --upgrade \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q --upgrade \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nfrom labelbox.schema.conflict_resolution_strategy import (\n ConflictResolutionStrategy,)\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import labelbox.types as lb_types\n", - "from labelbox.schema.conflict_resolution_strategy import (\n", - " ConflictResolutionStrategy,)\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API key and client\n", "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = None\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create a project\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Creates an empty project\nproject = client.create_project(\n name=\"my-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Creates an empty project\n", - "project = client.create_project(\n", - " name=\"my-test-project\",\n", - " description=\"a description\",\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create a dataset with data rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "dataset = client.create_dataset(name=\"project-demo-dataset\")\nglobal_keys = []\nuploads = []\n# Generate data rows\nfor i in range(1, 9):\n gb_key = \"TEST-ID-%id\" % uuid.uuid1()\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n gb_key,\n })\n global_keys.append(gb_key)\n\ntask = dataset.create_data_rows(uploads)\ntask.wait_till_done()\nprint(\"ERRORS: \", task.errors)\nprint(\"RESULT URL: \", task.result_url)", + "cell_type": "code", "outputs": [], - "source": [ - "dataset = client.create_dataset(name=\"project-demo-dataset\")\n", - "global_keys = []\n", - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 9):\n", - " gb_key = \"TEST-ID-%id\" % uuid.uuid1()\n", - " uploads.append({\n", - " \"row_data\":\n", - " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\":\n", - " gb_key,\n", - " })\n", - " global_keys.append(gb_key)\n", - "\n", - "task = dataset.create_data_rows(uploads)\n", - "task.wait_till_done()\n", - "print(\"ERRORS: \", task.errors)\n", - "print(\"RESULT URL: \", task.result_url)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Add data rows to a project \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"project-demo\", # each batch in a project must have a unique name\n global_keys=\n global_keys, # paginated collection of data row objects, list of data row ids or global keys\n priority=1, # priority between 1(highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"project-demo\", # each batch in a project must have a unique name\n", - " global_keys=\n", - " global_keys, # paginated collection of data row objects, list of data row ids or global keys\n", - " priority=1, # priority between 1(highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create tags and assign them to a project\n", "In this section, we are creating a tag in the ontology and associating it with a project. Then we are listing the tags attached to a project.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Create a tag" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get the organization\norganization = client.get_organization()\n\ntag = organization.create_resource_tag({\n \"text\": \"new-tag-name\",\n \"color\": \"4ed2f9\"\n})", + "cell_type": "code", "outputs": [], - "source": [ - "# Get the organization\n", - "organization = client.get_organization()\n", - "\n", - "tag = organization.create_resource_tag({\n", - " \"text\": \"new-tag-name\",\n", - " \"color\": \"4ed2f9\"\n", - "})" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Assign the tag to a project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "tags = project.update_project_resource_tags([tag.uid])", + "cell_type": "code", "outputs": [], - "source": [ - "tags = project.update_project_resource_tags([tag.uid])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Get project tags" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "tags = project.get_resource_tags()", + "cell_type": "code", "outputs": [], - "source": [ - "tags = project.get_resource_tags()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Attach ontology and label data rows\n", "\n", "In this section, we are creating an ontology to attach to a project and creating labels to import as ground truths. We need this setup to demonstrate other methods later in the demo. For more information, please reference our [Ontology](https://docs.labelbox.com/reference/ontology) and [Import Image Annotation](https://docs.labelbox.com/reference/import-image-annotations) development guides." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Create your ontology" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create normalized json with a radio classification\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n ])\n# Creating an ontology\nontology = client.create_ontology(\"test-ontology\", ontology_builder.asdict())", + "cell_type": "code", "outputs": [], - "source": [ - "# Create normalized json with a radio classification\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " ])\n", - "# Creating an ontology\n", - "ontology = client.create_ontology(\"test-ontology\", ontology_builder.asdict())" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "#### Attach ontology to project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create labels and upload them to project as ground truths" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create labels\nlabels = []\nfor global_key in global_keys:\n labels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n # Create radio classification annotation for labels\n lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n )\n ],\n ))\n\n# Upload labels for the data rows in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Create labels\n", - "labels = []\n", - "for global_key in global_keys:\n", - " labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " # Create radio classification annotation for labels\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - " )\n", - " ],\n", - " ))\n", - "\n", - "# Upload labels for the data rows in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "\n", - "print(f\"Errors: {upload_job.errors}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Move data rows in project to different task queues" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get list of task queues for project\ntask_queues = project.task_queues()\n\nfor task_queue in task_queues:\n print(task_queue)", + "cell_type": "code", "outputs": [], - "source": [ - "# Get list of task queues for project\n", - "task_queues = project.task_queues()\n", - "\n", - "for task_queue in task_queues:\n", - " print(task_queue)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.move_data_rows_to_task_queue(\n data_row_ids=lb.GlobalKeys(global_keys), # Provide a list of global keys\n task_queue_id=task_queues[2].\n uid, # Passing None moves data rows to \"Done\" task queue\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.move_data_rows_to_task_queue(\n", - " data_row_ids=lb.GlobalKeys(global_keys), # Provide a list of global keys\n", - " task_queue_id=task_queues[2].\n", - " uid, # Passing None moves data rows to \"Done\" task queue\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Fetch project configuration" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Note the project is not fully setup many of the fields will be empty.\nprint(\"Project is not setup yet:\", project.setup_complete is None)\nprint(\"Project name:\", project.name)\nprint(\"Project description:\", project.description)\nprint(\"Media Type:\", project.media_type)\nbatches = [b for b in project.batches()]\nprint(\"Project Batches\", batches)\nprint(\"Ontology:\", project.ontology())", + "cell_type": "code", "outputs": [], - "source": [ - "# Note the project is not fully setup many of the fields will be empty.\n", - "print(\"Project is not setup yet:\", project.setup_complete is None)\n", - "print(\"Project name:\", project.name)\n", - "print(\"Project description:\", project.description)\n", - "print(\"Media Type:\", project.media_type)\n", - "batches = [b for b in project.batches()]\n", - "print(\"Project Batches\", batches)\n", - "print(\"Ontology:\", project.ontology())" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Return number of labeled data rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "print(\"Number of labels:\", project.get_label_count())", + "cell_type": "code", "outputs": [], - "source": [ - "print(\"Number of labels:\", project.get_label_count())" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get project overview" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Returns only the number of data rows and issues\noverview = project.get_overview()\n\n# Returns the number of data rows, issues and the details of the in_review queue\ndetailed_overview = project.get_overview(details=True)", + "cell_type": "code", "outputs": [], - "source": [ - "# Returns only the number of data rows and issues\n", - "overview = project.get_overview()\n", - "\n", - "# Returns the number of data rows, issues and the details of the in_review queue\n", - "detailed_overview = project.get_overview(details=True)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Duplicate a project\n", "Please see the section [Duplicate a project](https://docs.labelbox.com/docs/create-a-project#duplicate-a-project) to have the scope of the method." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "destination_project = project.clone()", + "cell_type": "code", "outputs": [], - "source": [ - "destination_project = project.clone()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Copy labels and data rows from one project to a different project\n", "In the below steps we will be copying data rows with their corresponding labels from one project to a different project with a similar ontology. First, we must set up a new project with a ontology that matches the tooling of our source project ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create an empty destination project\ndestination_project = client.create_project(\n name=\"destination-test-project\",\n description=\"a description\",\n media_type=lb.MediaType.Image,\n)\n\n# Create ontology and attach to destination project\ndestination_ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"destination_radio_question\",\n options=[\n lb.Option(value=\"destination_first_radio_answer\"),\n lb.Option(value=\"destination_second_radio_answer\"),\n ],\n ),\n ])\n\ndestination_ontology = client.create_ontology(\"dest-test-ontology\",\n ontology_builder.asdict())\n\ndestination_project.setup_editor(destination_ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create an empty destination project\n", - "destination_project = client.create_project(\n", - " name=\"destination-test-project\",\n", - " description=\"a description\",\n", - " media_type=lb.MediaType.Image,\n", - ")\n", - "\n", - "# Create ontology and attach to destination project\n", - "destination_ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"destination_radio_question\",\n", - " options=[\n", - " lb.Option(value=\"destination_first_radio_answer\"),\n", - " lb.Option(value=\"destination_second_radio_answer\"),\n", - " ],\n", - " ),\n", - " ])\n", - "\n", - "destination_ontology = client.create_ontology(\"dest-test-ontology\",\n", - " ontology_builder.asdict())\n", - "\n", - "destination_project.setup_editor(destination_ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Copy data rows and labels\n", @@ -499,100 +343,37 @@ " * ConflictResolutionStrategy.OverrideWithAnnotations\n", "* `param batch_priority`\n", " - The priority of the batch." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get ontology dictionary to obtain featureSchemaIds\nsource_ontology_normalized = ontology.normalized\ndestination_ontology_normalized = destination_ontology.normalized\n\nANNOTATION_ONTOLOGY_MAPPING = {\n source_ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0]\n [\"featureSchemaId\"], # Classification featureSchemaID\n source_ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0][\"options\"][0]\n [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n source_ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n destination_ontology_normalized[\"classifications\"][0][\"options\"][1]\n [\"featureSchemaId\"],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Get ontology dictionary to obtain featureSchemaIds\n", - "source_ontology_normalized = ontology.normalized\n", - "destination_ontology_normalized = destination_ontology.normalized\n", - "\n", - "ANNOTATION_ONTOLOGY_MAPPING = {\n", - " source_ontology_normalized[\"classifications\"][0][\"featureSchemaId\"]:\n", - " destination_ontology_normalized[\"classifications\"][0]\n", - " [\"featureSchemaId\"], # Classification featureSchemaID\n", - " source_ontology_normalized[\"classifications\"][0][\"options\"][0][\"featureSchemaId\"]:\n", - " destination_ontology_normalized[\"classifications\"][0][\"options\"][0]\n", - " [\"featureSchemaId\"], # Different Classification Answer featureSchemaIDs\n", - " source_ontology_normalized[\"classifications\"][0][\"options\"][1][\"featureSchemaId\"]:\n", - " destination_ontology_normalized[\"classifications\"][0][\"options\"][1]\n", - " [\"featureSchemaId\"],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "send_to_annotate_params = {\n \"source_project_id\":\n project.uid,\n \"annotations_ontology_mapping\":\n ANNOTATION_ONTOLOGY_MAPPING,\n \"exclude_data_rows_in_project\":\n False,\n \"override_existing_annotations_rule\":\n ConflictResolutionStrategy.OverrideWithPredictions,\n \"batch_priority\":\n 5,\n}\n\n# Get task id to workflow you want to send data rows. If sent to initial labeling queue, labels will be pre-labels.\nqueue_id = [\n queue.uid\n for queue in destination_project.task_queues()\n if queue.queue_type == \"MANUAL_REVIEW_QUEUE\"\n][0]\n\ntask = client.send_to_annotate_from_catalog(\n destination_project_id=destination_project.uid,\n task_queue_id=\n queue_id, # ID of workflow task, set ID to None if you want to send data rows with labels to the Done queue.\n batch_name=\"Prediction Import Demo Batch\",\n data_rows=lb.GlobalKeys(\n global_keys # Provide a list of global keys from source project\n ),\n params=send_to_annotate_params,\n)\n\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")", + "cell_type": "code", "outputs": [], - "source": [ - "send_to_annotate_params = {\n", - " \"source_project_id\":\n", - " project.uid,\n", - " \"annotations_ontology_mapping\":\n", - " ANNOTATION_ONTOLOGY_MAPPING,\n", - " \"exclude_data_rows_in_project\":\n", - " False,\n", - " \"override_existing_annotations_rule\":\n", - " ConflictResolutionStrategy.OverrideWithPredictions,\n", - " \"batch_priority\":\n", - " 5,\n", - "}\n", - "\n", - "# Get task id to workflow you want to send data rows. If sent to initial labeling queue, labels will be pre-labels.\n", - "queue_id = [\n", - " queue.uid\n", - " for queue in destination_project.task_queues()\n", - " if queue.queue_type == \"MANUAL_REVIEW_QUEUE\"\n", - "][0]\n", - "\n", - "task = client.send_to_annotate_from_catalog(\n", - " destination_project_id=destination_project.uid,\n", - " task_queue_id=\n", - " queue_id, # ID of workflow task, set ID to None if you want to send data rows with labels to the Done queue.\n", - " batch_name=\"Prediction Import Demo Batch\",\n", - " data_rows=lb.GlobalKeys(\n", - " global_keys # Provide a list of global keys from source project\n", - " ),\n", - " params=send_to_annotate_params,\n", - ")\n", - "\n", - "task.wait_till_done()\n", - "\n", - "print(f\"Errors: {task.errors}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Clean up\n", "Uncomment and run the cell below to optionally delete Labelbox objects created." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# destination_project.delete()\n# dataset.delete()\n# client.delete_unused_ontology(destination_ontology.uid)\n# client.delete_unused_ontology(ontology.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# destination_project.delete()\n", - "# dataset.delete()\n", - "# client.delete_unused_ontology(destination_ontology.uid)\n", - "# client.delete_unused_ontology(ontology.uid)" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/examples/exports/composite_mask_export.ipynb b/examples/exports/composite_mask_export.ipynb index 679f82624..0a1cbfd45 100644 --- a/examples/exports/composite_mask_export.ipynb +++ b/examples/exports/composite_mask_export.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Export composite masks \n", @@ -34,66 +36,53 @@ "Composite masks are a combination of mask instances grouped in a single mask URL. \n", "\n", "The purpose of this demo is to demonstrate how to transition from exporting single masks to exporting composite masks. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport urllib.request\nfrom PIL import Image\nimport json", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import urllib.request\n", - "from PIL import Image\n", - "import json" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)\nclient.enable_experimental = (\n True ## This is required if using the export() streamable method\n)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)\n", - "client.enable_experimental = (\n", - " True ## This is required if using the export() streamable method\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Key differences between single mask instance and composite mask." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Composite masks\n", @@ -128,31 +117,17 @@ " }\n", "```\n", "3. A unique RGB color is assigned to each mask instance. The example below shows a composite mask of a label, and while it contains all mask instances, only the RGB color associated with this particular annotation will be filled in under the ```color_rgb``` field." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Example on how to fetch a composite mask\n# The mask here shows all the mask instances associated with a label\ntask_id = \"\"\ncomposite_mask_id = \"\"\n\nmask_url = f\"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\"\nreq = urllib.request.Request(mask_url, headers=client.headers)\nimage = Image.open(urllib.request.urlopen(req))\nw, h = image.size\nnew_w = w // 4\nnew_h = h // 4\n\nimage.resize((new_w, new_h), Image.BICUBIC)", + "cell_type": "code", "outputs": [], - "source": [ - "# Example on how to fetch a composite mask\n", - "# The mask here shows all the mask instances associated with a label\n", - "task_id = \"\"\n", - "composite_mask_id = \"\"\n", - "\n", - "mask_url = f\"https://api.labelbox.com/api/v1/tasks/{task_id}/masks/{composite_mask_id}/index/1\"\n", - "req = urllib.request.Request(mask_url, headers=client.headers)\n", - "image = Image.open(urllib.request.urlopen(req))\n", - "w, h = image.size\n", - "new_w = w // 4\n", - "new_h = h // 4\n", - "\n", - "image.resize((new_w, new_h), Image.BICUBIC)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Here's an example of an entry featuring a composite mask (see image above) containing the mask instance's RGB color uniquely associated with the annotation.\n", @@ -176,17 +151,17 @@ " }\n", "```\n", "- rgb(123,103,152) = Purple\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "---" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Single mask instance:\n", @@ -202,193 +177,79 @@ "\n", "```\n", "3. RGB color is not present" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create an export from an Image project with mask annotations\n", "To better showcase composite masks, make sure you have different mask tools and mask annotations in your project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the project ID of the project from which you wish to export data rows.\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the project ID of the project from which you wish to export data rows.\n", - "PROJECT_ID = \"\"\n", - "project = client.get_project(PROJECT_ID)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n}\n\nfilters = {}\n\n# export() is the streamable option of exports V2, for more information please visit our documentation:\n# https://docs.labelbox.com/reference/label-export#export-v2-methods\n\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.has_result():\n print(export_task.result)", + "cell_type": "code", "outputs": [], - "source": [ - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - "}\n", - "\n", - "filters = {}\n", - "\n", - "# export() is the streamable option of exports V2, for more information please visit our documentation:\n", - "# https://docs.labelbox.com/reference/label-export#export-v2-methods\n", - "\n", - "export_task = project.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.has_result():\n", - " print(export_task.result)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Get all the ```color_rgb``` associated with annotations that are using a specific mask tool " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "stream = export_task.get_buffered_stream()\n\nmask_tool_rgb_mapping = {}\n\nfor output in stream:\n # Parse the JSON string from the output\n output_json = output.json\n\n # Get the labels for the specified project ID or an empty list if the project ID is not found\n project_labels = (output_json[\"projects\"].get(PROJECT_ID,\n {}).get(\"labels\", []))\n\n # Iterate through each label\n for label in project_labels:\n # Get the list of annotations (objects) for the label\n annotations = label[\"annotations\"].get(\"objects\", [])\n\n # Iterate through each annotation\n for annotation in annotations:\n # Check if the annotation is of type \"ImageSegmentationMask\"\n if annotation.get(\"annotation_kind\") == \"ImageSegmentationMask\":\n # Add the color RGB information to the mapping dictionary\n mask_tool_rgb_mapping.setdefault(annotation[\"name\"], []).append(\n annotation[\"composite_mask\"][\"color_rgb\"])\n\nprint(mask_tool_rgb_mapping)", + "cell_type": "code", "outputs": [], - "source": [ - "stream = export_task.get_buffered_stream()\n", - "\n", - "mask_tool_rgb_mapping = {}\n", - "\n", - "for output in stream:\n", - " # Parse the JSON string from the output\n", - " output_json = output.json\n", - "\n", - " # Get the labels for the specified project ID or an empty list if the project ID is not found\n", - " project_labels = (output_json[\"projects\"].get(PROJECT_ID,\n", - " {}).get(\"labels\", []))\n", - "\n", - " # Iterate through each label\n", - " for label in project_labels:\n", - " # Get the list of annotations (objects) for the label\n", - " annotations = label[\"annotations\"].get(\"objects\", [])\n", - "\n", - " # Iterate through each annotation\n", - " for annotation in annotations:\n", - " # Check if the annotation is of type \"ImageSegmentationMask\"\n", - " if annotation.get(\"annotation_kind\") == \"ImageSegmentationMask\":\n", - " # Add the color RGB information to the mapping dictionary\n", - " mask_tool_rgb_mapping.setdefault(annotation[\"name\"], []).append(\n", - " annotation[\"composite_mask\"][\"color_rgb\"])\n", - "\n", - "print(mask_tool_rgb_mapping)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create an export from a Video project with mask annotations " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "VIDEO_PROJECT_ID = \"\"\nproject_video = client.get_project(VIDEO_PROJECT_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "VIDEO_PROJECT_ID = \"\"\n", - "project_video = client.get_project(VIDEO_PROJECT_ID)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n}\n\nfilters = {}\n\n# export() is the streamable option of exports V2, for more information please visit our documentation:\n# https://docs.labelbox.com/reference/label-export#export-v2-methods\n\nexport_task_video = project_video.export(params=export_params, filters=filters)\nexport_task_video.wait_till_done()\n\nif export_task_video.has_result():\n print(export_task_video.result)", + "cell_type": "code", "outputs": [], - "source": [ - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - "}\n", - "\n", - "filters = {}\n", - "\n", - "# export() is the streamable option of exports V2, for more information please visit our documentation:\n", - "# https://docs.labelbox.com/reference/label-export#export-v2-methods\n", - "\n", - "export_task_video = project_video.export(params=export_params, filters=filters)\n", - "export_task_video.wait_till_done()\n", - "\n", - "if export_task_video.has_result():\n", - " print(export_task_video.result)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Get all the ```color_rgb``` associated with annotations that are using a specific mask tool from each frame" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "tools_frames_color = {}\nstream = export_task_video.get_buffered_stream()\n\n# Iterate over each output in the stream\nfor output in stream:\n output_json = output.json\n\n # Iterate over the labels in the specific project\n for dr in output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n frames_data = dr[\"annotations\"][\"frames\"]\n\n # Iterate over each frame in the frames data\n for frame_key, frame_value in frames_data.items():\n\n # Iterate over each annotation in the frame\n for annotation_key, annotation_value in frame_value.items():\n if \"objects\" in annotation_key and annotation_value.values():\n\n # Iterate over each object in the annotation\n for object_key, object_value in annotation_value.items():\n if (object_value[\"annotation_kind\"] ==\n \"VideoSegmentationMask\"):\n # Update tools_frames_color with object information\n tools_frames_color.setdefault(\n object_value[\"name\"], []).append({\n frame_key:\n object_value[\"composite_mask\"]\n [\"color_rgb\"]\n })\n\nprint(tools_frames_color)", + "cell_type": "code", "outputs": [], - "source": [ - "tools_frames_color = {}\n", - "stream = export_task_video.get_buffered_stream()\n", - "\n", - "# Iterate over each output in the stream\n", - "for output in stream:\n", - " output_json = output.json\n", - "\n", - " # Iterate over the labels in the specific project\n", - " for dr in output_json[\"projects\"][VIDEO_PROJECT_ID][\"labels\"]:\n", - " frames_data = dr[\"annotations\"][\"frames\"]\n", - "\n", - " # Iterate over each frame in the frames data\n", - " for frame_key, frame_value in frames_data.items():\n", - "\n", - " # Iterate over each annotation in the frame\n", - " for annotation_key, annotation_value in frame_value.items():\n", - " if \"objects\" in annotation_key and annotation_value.values():\n", - "\n", - " # Iterate over each object in the annotation\n", - " for object_key, object_value in annotation_value.items():\n", - " if (object_value[\"annotation_kind\"] ==\n", - " \"VideoSegmentationMask\"):\n", - " # Update tools_frames_color with object information\n", - " tools_frames_color.setdefault(\n", - " object_value[\"name\"], []).append({\n", - " frame_key:\n", - " object_value[\"composite_mask\"]\n", - " [\"color_rgb\"]\n", - " })\n", - "\n", - "print(tools_frames_color)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/model_experiments/custom_metrics_demo.ipynb b/examples/model_experiments/custom_metrics_demo.ipynb index 670073141..28a63c011 100644 --- a/examples/model_experiments/custom_metrics_demo.ipynb +++ b/examples/model_experiments/custom_metrics_demo.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Model Diagnostics - Custom Metrics Demo\n", @@ -34,1535 +36,243 @@ " * Iterate faster\n", " * Measure and report on model quality\n", " * Understand marginal value of additional labels and modeling efforts\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Set up" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q --upgrade \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q --upgrade \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API key and client\n", "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = None\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = None\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Classifications" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Radio (single-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "radio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n )),\n)\n\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.1\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Checklist (multi-choice)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n lb_types.ClassificationAnswer(\n name=\"second_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n ]),\n)\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n {\n \"name\":\n \"second_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - " lb_types.ClassificationAnswer(\n", - " name=\"second_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - " ]),\n", - ")\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " {\n", - " \"name\":\n", - " \"second_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Nested radio and checklist" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "nested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"confidence\":\n 0.5,\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n}\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"confidence\":\n 0.5,\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\":\n \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n },\n }],\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_radio_question\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - "}\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding Box" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "bbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"bbox\": {\n \"top\": 977,\n \"left\": 1690,\n \"height\": 330,\n \"width\": 225\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"bbox\": {\n", - " \"top\": 977,\n", - " \"left\": 1690,\n", - " \"height\": 330,\n", - " \"width\": 225\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Bounding box with nested classification " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332,\n },\n ],\n )),\n )\n ],\n)\n## NDJSON\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.2\n },\n {\n \"name\": \"precision\",\n \"value\": 0.1\n },\n {\n \"name\": \"recall\",\n \"value\": 0.3\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 23\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n },\n }],\n \"bbox\": {\n \"top\": 933,\n \"left\": 541,\n \"height\": 191,\n \"width\": 330\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332,\n", - " },\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "## NDJSON\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.2\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.1\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.3\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 23\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 933,\n", - " \"left\": 541,\n", - " \"height\": 191,\n", - " \"width\": 330\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polygon" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Anotation\npolygon_prediction = lb_types.ObjectAnnotation(\n name=\"polygon\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\npolygon_prediction_ndjson = {\n \"name\":\n \"polygon\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"polygon\": [\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n {\n \"x\": 2278.306,\n \"y\": 256.885\n },\n {\n \"x\": 2428.197,\n \"y\": 200.437\n },\n {\n \"x\": 2560.0,\n \"y\": 335.419\n },\n {\n \"x\": 2557.386,\n \"y\": 503.165\n },\n {\n \"x\": 2320.596,\n \"y\": 503.103\n },\n {\n \"x\": 2156.083,\n \"y\": 628.943\n },\n {\n \"x\": 2161.111,\n \"y\": 785.519\n },\n {\n \"x\": 2002.115,\n \"y\": 894.647\n },\n {\n \"x\": 1838.456,\n \"y\": 877.874\n },\n {\n \"x\": 1436.53,\n \"y\": 874.636\n },\n {\n \"x\": 1411.403,\n \"y\": 758.579\n },\n {\n \"x\": 1353.853,\n \"y\": 751.74\n },\n {\n \"x\": 1345.264,\n \"y\": 453.461\n },\n {\n \"x\": 1426.011,\n \"y\": 421.129\n },\n {\n \"x\": 1489.581,\n \"y\": 183.934\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Anotation\n", - "polygon_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "polygon_prediction_ndjson = {\n", - " \"name\":\n", - " \"polygon\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"polygon\": [\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " {\n", - " \"x\": 2278.306,\n", - " \"y\": 256.885\n", - " },\n", - " {\n", - " \"x\": 2428.197,\n", - " \"y\": 200.437\n", - " },\n", - " {\n", - " \"x\": 2560.0,\n", - " \"y\": 335.419\n", - " },\n", - " {\n", - " \"x\": 2557.386,\n", - " \"y\": 503.165\n", - " },\n", - " {\n", - " \"x\": 2320.596,\n", - " \"y\": 503.103\n", - " },\n", - " {\n", - " \"x\": 2156.083,\n", - " \"y\": 628.943\n", - " },\n", - " {\n", - " \"x\": 2161.111,\n", - " \"y\": 785.519\n", - " },\n", - " {\n", - " \"x\": 2002.115,\n", - " \"y\": 894.647\n", - " },\n", - " {\n", - " \"x\": 1838.456,\n", - " \"y\": 877.874\n", - " },\n", - " {\n", - " \"x\": 1436.53,\n", - " \"y\": 874.636\n", - " },\n", - " {\n", - " \"x\": 1411.403,\n", - " \"y\": 758.579\n", - " },\n", - " {\n", - " \"x\": 1353.853,\n", - " \"y\": 751.74\n", - " },\n", - " {\n", - " \"x\": 1345.264,\n", - " \"y\": 453.461\n", - " },\n", - " {\n", - " \"x\": 1426.011,\n", - " \"y\": 421.129\n", - " },\n", - " {\n", - " \"x\": 1489.581,\n", - " \"y\": 183.934\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Classification: Free-form text" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python annotation\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(\n answer=\"sample text\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n ),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python annotation\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\",\n", - " value=lb_types.Text(\n", - " answer=\"sample text\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Point" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Python Annotation\npoint_prediction = lb_types.ObjectAnnotation(\n name=\"point\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npoint_prediction_ndjson = {\n \"name\": \"point\",\n \"confidence\": 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"point\": {\n \"x\": 1166.606,\n \"y\": 1441.768\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# Python Annotation\n", - "point_prediction = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "point_prediction_ndjson = {\n", - " \"name\": \"point\",\n", - " \"confidence\": 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [],\n", - " \"point\": {\n", - " \"x\": 1166.606,\n", - " \"y\": 1441.768\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Polyline" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "polyline_prediction = lb_types.ObjectAnnotation(\n name=\"polyline\",\n confidence=0.5,\n custom_metrics=[\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)\n\npolyline_prediction_ndjson = {\n \"name\":\n \"polyline\",\n \"confidence\":\n 0.5,\n \"customMetrics\": [\n {\n \"name\": \"iou\",\n \"value\": 0.5\n },\n {\n \"name\": \"f1\",\n \"value\": 0.33\n },\n {\n \"name\": \"precision\",\n \"value\": 0.55\n },\n {\n \"name\": \"recall\",\n \"value\": 0.33\n },\n {\n \"name\": \"tagsCount\",\n \"value\": 43\n },\n {\n \"name\": \"metric_with_a_very_long_name\",\n \"value\": 0.334332\n },\n ],\n \"classifications\": [],\n \"line\": [\n {\n \"x\": 2534.353,\n \"y\": 249.471\n },\n {\n \"x\": 2429.492,\n \"y\": 182.092\n },\n {\n \"x\": 2294.322,\n \"y\": 221.962\n },\n {\n \"x\": 2224.491,\n \"y\": 180.463\n },\n {\n \"x\": 2136.123,\n \"y\": 204.716\n },\n {\n \"x\": 1712.247,\n \"y\": 173.949\n },\n {\n \"x\": 1703.838,\n \"y\": 84.438\n },\n {\n \"x\": 1579.772,\n \"y\": 82.61\n },\n {\n \"x\": 1583.442,\n \"y\": 167.552\n },\n {\n \"x\": 1478.869,\n \"y\": 164.903\n },\n {\n \"x\": 1418.941,\n \"y\": 318.149\n },\n {\n \"x\": 1243.128,\n \"y\": 400.815\n },\n {\n \"x\": 1022.067,\n \"y\": 319.007\n },\n {\n \"x\": 892.367,\n \"y\": 379.216\n },\n {\n \"x\": 670.273,\n \"y\": 364.408\n },\n {\n \"x\": 613.114,\n \"y\": 288.16\n },\n {\n \"x\": 377.559,\n \"y\": 238.251\n },\n {\n \"x\": 368.087,\n \"y\": 185.064\n },\n {\n \"x\": 246.557,\n \"y\": 167.286\n },\n {\n \"x\": 236.648,\n \"y\": 285.61\n },\n {\n \"x\": 90.929,\n \"y\": 326.412\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "polyline_prediction = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " confidence=0.5,\n", - " custom_metrics=[\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")\n", - "\n", - "polyline_prediction_ndjson = {\n", - " \"name\":\n", - " \"polyline\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"customMetrics\": [\n", - " {\n", - " \"name\": \"iou\",\n", - " \"value\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"f1\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"precision\",\n", - " \"value\": 0.55\n", - " },\n", - " {\n", - " \"name\": \"recall\",\n", - " \"value\": 0.33\n", - " },\n", - " {\n", - " \"name\": \"tagsCount\",\n", - " \"value\": 43\n", - " },\n", - " {\n", - " \"name\": \"metric_with_a_very_long_name\",\n", - " \"value\": 0.334332\n", - " },\n", - " ],\n", - " \"classifications\": [],\n", - " \"line\": [\n", - " {\n", - " \"x\": 2534.353,\n", - " \"y\": 249.471\n", - " },\n", - " {\n", - " \"x\": 2429.492,\n", - " \"y\": 182.092\n", - " },\n", - " {\n", - " \"x\": 2294.322,\n", - " \"y\": 221.962\n", - " },\n", - " {\n", - " \"x\": 2224.491,\n", - " \"y\": 180.463\n", - " },\n", - " {\n", - " \"x\": 2136.123,\n", - " \"y\": 204.716\n", - " },\n", - " {\n", - " \"x\": 1712.247,\n", - " \"y\": 173.949\n", - " },\n", - " {\n", - " \"x\": 1703.838,\n", - " \"y\": 84.438\n", - " },\n", - " {\n", - " \"x\": 1579.772,\n", - " \"y\": 82.61\n", - " },\n", - " {\n", - " \"x\": 1583.442,\n", - " \"y\": 167.552\n", - " },\n", - " {\n", - " \"x\": 1478.869,\n", - " \"y\": 164.903\n", - " },\n", - " {\n", - " \"x\": 1418.941,\n", - " \"y\": 318.149\n", - " },\n", - " {\n", - " \"x\": 1243.128,\n", - " \"y\": 400.815\n", - " },\n", - " {\n", - " \"x\": 1022.067,\n", - " \"y\": 319.007\n", - " },\n", - " {\n", - " \"x\": 892.367,\n", - " \"y\": 379.216\n", - " },\n", - " {\n", - " \"x\": 670.273,\n", - " \"y\": 364.408\n", - " },\n", - " {\n", - " \"x\": 613.114,\n", - " \"y\": 288.16\n", - " },\n", - " {\n", - " \"x\": 377.559,\n", - " \"y\": 238.251\n", - " },\n", - " {\n", - " \"x\": 368.087,\n", - " \"y\": 185.064\n", - " },\n", - " {\n", - " \"x\": 246.557,\n", - " \"y\": 167.286\n", - " },\n", - " {\n", - " \"x\": 236.648,\n", - " \"y\": 285.61\n", - " },\n", - " {\n", - " \"x\": 90.929,\n", - " \"y\": 326.412\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as batch to the project\nglobal_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\ntest_img_urls = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"Custom metrics demo\",\n iam_integration=None)\ntask = dataset.create_data_rows([test_img_urls])\n\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as batch to the project\n", - "global_key = \"2560px-Kitano_Street_Kobe01s5s4110.jpeg\" + str(uuid.uuid4())\n", - "test_img_urls = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"Custom metrics demo\",\n", - " iam_integration=None)\n", - "task = dataset.create_data_rows([test_img_urls])\n", - "\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of tools\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"first_sub_radio_answer\")],\n ),\n ],\n ),\n lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n ],\n)\n\nontology = client.create_ontology(\n \"Image Prediction Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of tools\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(value=\"first_sub_radio_answer\")],\n", - " ),\n", - " ],\n", - " ),\n", - " lb.Tool(tool=lb.Tool.Type.POLYGON, name=\"polygon\"),\n", - " lb.Tool(tool=lb.Tool.Type.POINT, name=\"point\"),\n", - " lb.Tool(tool=lb.Tool.Type.LINE, name=\"polyline\"),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Image Prediction Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(\n name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n ontology_id=ontology.uid,\n)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(\n", - " name=\"model_with_aggregated_custom_metrics\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid,\n", - ")\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5. Create the predictions payload\n", @@ -1570,376 +280,150 @@ "Create the prediction payload using the snippets of code in ***Supported Predictions*** section.\n", "\n", "The resulting label_ndjson should have exactly the same content for predictions that are supported by both (with exception of the uuid strings that are generated)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label for predictions\nlabel_prediction = []\nlabel_prediction.append(\n lb_types.Label(\n data=lb_types.ImageData(global_key=global_key),\n annotations=[\n radio_prediction,\n nested_radio_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n polyline_prediction,\n polygon_prediction,\n point_prediction,\n text_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label for predictions\n", - "label_prediction = []\n", - "label_prediction.append(\n", - " lb_types.Label(\n", - " data=lb_types.ImageData(global_key=global_key),\n", - " annotations=[\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " polyline_prediction,\n", - " polygon_prediction,\n", - " point_prediction,\n", - " text_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_prediction_ndjson = []\n\nfor annot in [\n radio_prediction_ndjson,\n checklist_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n polygon_prediction_ndjson,\n point_prediction_ndjson,\n polyline_prediction_ndjson,\n text_annotation_ndjson,\n nested_radio_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n]:\n annot.update({\"dataRow\": {\"globalKey\": global_key}})\n label_prediction_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_prediction_ndjson = []\n", - "\n", - "for annot in [\n", - " radio_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " polygon_prediction_ndjson,\n", - " point_prediction_ndjson,\n", - " polyline_prediction_ndjson,\n", - " text_annotation_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - "]:\n", - " annot.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_prediction_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6. Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_prediction,\n)\n\n# Errors will appear for prediction uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_prediction,\n", - ")\n", - "\n", - "# Errors will appear for prediction uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to a model run\n", "To visualize both annotations and predictions in the model run we will create a project with ground truth annotations.\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.1. Create a labelbox project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"image_prediction_many_kinds\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"image_prediction_many_kinds\",\n", - " media_type=lb.MediaType.Image)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.2. Create a batch to send to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_predictions_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_predictions_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Annotations ###########\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977), # x = left, y = top\n end=lb_types.Point(x=1915,\n y=1307), # x= left + width , y = top + height\n ),\n)\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\", confidence=0.5)),\n )\n ],\n)\n\npolygon_annotation = lb_types.ObjectAnnotation(\n name=\"polygon\",\n value=lb_types.Polygon(points=[\n lb_types.Point(x=1489.581, y=183.934),\n lb_types.Point(x=2278.306, y=256.885),\n lb_types.Point(x=2428.197, y=200.437),\n lb_types.Point(x=2560.0, y=335.419),\n lb_types.Point(x=2557.386, y=503.165),\n lb_types.Point(x=2320.596, y=503.103),\n lb_types.Point(x=2156.083, y=628.943),\n lb_types.Point(x=2161.111, y=785.519),\n lb_types.Point(x=2002.115, y=894.647),\n lb_types.Point(x=1838.456, y=877.874),\n lb_types.Point(x=1436.53, y=874.636),\n lb_types.Point(x=1411.403, y=758.579),\n lb_types.Point(x=1353.853, y=751.74),\n lb_types.Point(x=1345.264, y=453.461),\n lb_types.Point(x=1426.011, y=421.129),\n ]),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\npoint_annotation = lb_types.ObjectAnnotation(\n name=\"point\",\n value=lb_types.Point(x=1166.606, y=1441.768),\n)\n\npolyline_annotation = lb_types.ObjectAnnotation(\n name=\"polyline\",\n value=lb_types.Line(points=[\n lb_types.Point(x=2534.353, y=249.471),\n lb_types.Point(x=2429.492, y=182.092),\n lb_types.Point(x=2294.322, y=221.962),\n lb_types.Point(x=2224.491, y=180.463),\n lb_types.Point(x=2136.123, y=204.716),\n lb_types.Point(x=1712.247, y=173.949),\n lb_types.Point(x=1703.838, y=84.438),\n lb_types.Point(x=1579.772, y=82.61),\n lb_types.Point(x=1583.442, y=167.552),\n lb_types.Point(x=1478.869, y=164.903),\n lb_types.Point(x=1418.941, y=318.149),\n lb_types.Point(x=1243.128, y=400.815),\n lb_types.Point(x=1022.067, y=319.007),\n lb_types.Point(x=892.367, y=379.216),\n lb_types.Point(x=670.273, y=364.408),\n lb_types.Point(x=613.114, y=288.16),\n lb_types.Point(x=377.559, y=238.251),\n lb_types.Point(x=368.087, y=185.064),\n lb_types.Point(x=246.557, y=167.286),\n lb_types.Point(x=236.648, y=285.61),\n lb_types.Point(x=90.929, y=326.412),\n ]),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "########### Annotations ###########\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=1690, y=977), # x = left, y = top\n", - " end=lb_types.Point(x=1915,\n", - " y=1307), # x= left + width , y = top + height\n", - " ),\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.Rectangle(\n", - " start=lb_types.Point(x=541, y=933), # x = left, y = top\n", - " end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\", confidence=0.5)),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "polygon_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polygon\",\n", - " value=lb_types.Polygon(points=[\n", - " lb_types.Point(x=1489.581, y=183.934),\n", - " lb_types.Point(x=2278.306, y=256.885),\n", - " lb_types.Point(x=2428.197, y=200.437),\n", - " lb_types.Point(x=2560.0, y=335.419),\n", - " lb_types.Point(x=2557.386, y=503.165),\n", - " lb_types.Point(x=2320.596, y=503.103),\n", - " lb_types.Point(x=2156.083, y=628.943),\n", - " lb_types.Point(x=2161.111, y=785.519),\n", - " lb_types.Point(x=2002.115, y=894.647),\n", - " lb_types.Point(x=1838.456, y=877.874),\n", - " lb_types.Point(x=1436.53, y=874.636),\n", - " lb_types.Point(x=1411.403, y=758.579),\n", - " lb_types.Point(x=1353.853, y=751.74),\n", - " lb_types.Point(x=1345.264, y=453.461),\n", - " lb_types.Point(x=1426.011, y=421.129),\n", - " ]),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "point_annotation = lb_types.ObjectAnnotation(\n", - " name=\"point\",\n", - " value=lb_types.Point(x=1166.606, y=1441.768),\n", - ")\n", - "\n", - "polyline_annotation = lb_types.ObjectAnnotation(\n", - " name=\"polyline\",\n", - " value=lb_types.Line(points=[\n", - " lb_types.Point(x=2534.353, y=249.471),\n", - " lb_types.Point(x=2429.492, y=182.092),\n", - " lb_types.Point(x=2294.322, y=221.962),\n", - " lb_types.Point(x=2224.491, y=180.463),\n", - " lb_types.Point(x=2136.123, y=204.716),\n", - " lb_types.Point(x=1712.247, y=173.949),\n", - " lb_types.Point(x=1703.838, y=84.438),\n", - " lb_types.Point(x=1579.772, y=82.61),\n", - " lb_types.Point(x=1583.442, y=167.552),\n", - " lb_types.Point(x=1478.869, y=164.903),\n", - " lb_types.Point(x=1418.941, y=318.149),\n", - " lb_types.Point(x=1243.128, y=400.815),\n", - " lb_types.Point(x=1022.067, y=319.007),\n", - " lb_types.Point(x=892.367, y=379.216),\n", - " lb_types.Point(x=670.273, y=364.408),\n", - " lb_types.Point(x=613.114, y=288.16),\n", - " lb_types.Point(x=377.559, y=238.251),\n", - " lb_types.Point(x=368.087, y=185.064),\n", - " lb_types.Point(x=246.557, y=167.286),\n", - " lb_types.Point(x=236.648, y=285.61),\n", - " lb_types.Point(x=90.929, y=326.412),\n", - " ]),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.4. Create the label object" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n polygon_annotation,\n point_annotation,\n polyline_annotation,\n]\nlabel.append(\n lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n annotations=annotations))", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n", - "label = []\n", - "annotations = [\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " polygon_annotation,\n", - " point_annotation,\n", - " polyline_annotation,\n", - "]\n", - "label.append(\n", - " lb_types.Label(data=lb_types.ImageData(global_key=global_key),\n", - " annotations=annotations))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.5. Upload annotations to the project using Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"annotation_import_\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"annotation_import_\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### 7.6 Send the annotations to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Optional deletions for cleanup\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/examples/model_experiments/model_slices.ipynb b/examples/model_experiments/model_slices.ipynb index 546f0aedf..91575a43e 100644 --- a/examples/model_experiments/model_slices.ipynb +++ b/examples/model_experiments/model_slices.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Model Slices\n", @@ -35,192 +37,136 @@ "This notebook is used to go over some common Labelbox SDK methods to interact with Model Slices created through the Labelbox platform.\n", "\n", "See [Slices](https://docs.labelbox.com/docs/slices-1) for more information on modifying Model Slices." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Set up" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q --upgrade \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q --upgrade \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": 1, "metadata": {}, + "source": "import labelbox as lb\nimport uuid", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API key and client\n", "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API key](https://docs.labelbox.com/reference/create-api-key) guide." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = None\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create Model Slice\n", "\n", "In order to interact with model slices, you must create a Model Experiment with a Model Run and then create a Model Slice through the platform. The steps below go over this process. See [Model](https://docs.labelbox.com/reference/model) from our developer guides for more information." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Creating Model Experiment\n", "\n", "To create a Model Experiment you will need to create an ontology. See [Ontology](https://docs.labelbox.com/reference/ontology) for more information" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Ontology" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "classification_features = [\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"Quality Issues\",\n", - " options=[\n", - " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", - " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", - " ],\n", - " )\n", - "]\n", - "\n", - "ontology_builder = lb.OntologyBuilder(tools=[],\n", - " classifications=classification_features)\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology from new features\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Experiment" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model = client.create_model(name=f\"Model Slice Demo {str(uuid.uuid4())}\",\n ontology_id=ontology.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "model = client.create_model(name=f\"Model Slice Demo {str(uuid.uuid4())}\", ontology_id=ontology.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Creating a Model Run from Model Experiment\n", "\n", "On this step we will need to create a dataset to attach data rows to our model run. See [Dataset](https://docs.labelbox.com/reference/dataset) for more information." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Dataset and Data Rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# send a sample image as data row for a dataset\nglobal_key = str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"foundry-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\n\nprint(f\"Errors: {task.errors}\")\nprint(f\"Failed data rows: {task.failed_data_rows}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# send a sample image as data row for a dataset\n", - "global_key = str(uuid.uuid4())\n", - "\n", - "test_img_url = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"foundry-demo-dataset\")\n", - "task = dataset.create_data_rows([test_img_url])\n", - "task.wait_till_done()\n", - "\n", - "print(f\"Errors: {task.errors}\")\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Run and Attach Data Rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run_name = \"Model Slice Demo\"\nexample_config = {\n \"learning_rate\": 0.001,\n \"batch_size\": 32,\n}\nmodel_run = model.create_model_run(name=model_run_name, config=example_config)", + "cell_type": "code", "outputs": [], - "source": [ - "model_run_name = \"Model Slice Demo\"\n", - "example_config = {\n", - " \"learning_rate\": 0.001,\n", - " \"batch_size\": 32,\n", - "}\n", - "model_run = model.create_model_run(name=model_run_name, config=example_config)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Make Model Slice Inside UI\n", @@ -236,139 +182,86 @@ "5. Give the slice a name and select ***Save***.\n", "6. Above the ***Search your data*** dropdown you will see your slice's name. Select that dropdown and click ***Copy slice ID***.\n", "7. Paste the ***Slice ID*** below." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "SLICE_ID = \"\"", + "cell_type": "code", "outputs": [], - "source": [ - "SLICE_ID = \"\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get Model Slice" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_slice = client.get_model_slice(SLICE_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "model_slice = client.get_model_slice(SLICE_ID)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Data Row IDs from Model Slice" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_row_ids = model_slice.get_data_row_ids(model_run.uid)\n\nfor data_row_id in data_row_ids:\n print(data_row_id)", + "cell_type": "code", "outputs": [], - "source": [ - "data_row_ids = model_slice.get_data_row_ids(model_run.uid)\n", - "\n", - "for data_row_id in data_row_ids:\n", - " print(data_row_id)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Data Row Identifiers Objects" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_rows = model_slice.get_data_row_identifiers(model_run.uid)\n\nfor data_row in data_rows:\n print(data_row)", + "cell_type": "code", "outputs": [], - "source": [ - "data_rows = model_slice.get_data_row_identifiers(model_run.uid)\n", - "\n", - "for data_row in data_rows:\n", - " print(data_row)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Model Slice Attributes" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# name (str)\nmodel_slice.name\n\n# description (str)\nmodel_slice.description\n\n# updated at (datetime)\nmodel_slice.updated_at\n\n# created at (datetime)\nmodel_slice.created_at\n\n# filter (list[dict])\nmodel_slice.filter", + "cell_type": "code", "outputs": [], - "source": [ - "# name (str)\n", - "model_slice.name\n", - "\n", - "# description (str)\n", - "model_slice.description\n", - "\n", - "# updated at (datetime)\n", - "model_slice.updated_at\n", - "\n", - "# created at (datetime)\n", - "model_slice.created_at\n", - "\n", - "# filter (list[dict])\n", - "model_slice.filter" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Clean up\n", "Uncomment and run the cell below to optionally delete Labelbox objects created." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# model_run.delete()\n# model.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# model_run.delete()\n", - "# model.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/prediction_upload/pdf_predictions.ipynb b/examples/prediction_upload/pdf_predictions.ipynb index 962e35704..b50d0c3cc 100644 --- a/examples/prediction_upload/pdf_predictions.ipynb +++ b/examples/prediction_upload/pdf_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -51,419 +53,115 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_prediction_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": bbox_dim_1,\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", - ")\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\",\n", - " confidence=0.5,\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": bbox_dim,\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_prediction_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -477,200 +175,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -679,508 +237,184 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "client.enable_experimental = True\ntask = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "client.enable_experimental = True\n", - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_buffered_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = output.json\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - "\n", - "# re-write the entity annotation with text selections\n", - "entities_prediction_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_prediction_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " textSelections=text_selections_ner,\n", - ")\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", - "print(f\"entities_annotation={entities_prediction}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions = []\n", - "\n", - "label_predictions.append(\n", - " lb_types.Label(\n", - " data=lb_types.DocumentData(global_key=global_key),\n", - " annotations=[\n", - " entities_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " text_prediction,\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " ner_with_checklist_subclass_prediction,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions_ndjson = []\n", - "for annot in [\n", - " entities_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " ner_with_checklist_subclass_prediction_ndjson,\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_predictions_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_predictions,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.setup_editor(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(name=\"Document Prediction Import Demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.setup_editor(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", + "cell_type": "code", "outputs": [], - "source": [ - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(name=\"named_entity\",\n", - " textSelections=text_selections),\n", - ")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",)\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", - " text_selections=text_selections_ner),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/examples/project_configuration/queue_management.ipynb b/examples/project_configuration/queue_management.ipynb index a5a450764..a4125386f 100644 --- a/examples/project_configuration/queue_management.ipynb +++ b/examples/project_configuration/queue_management.ipynb @@ -1,381 +1,206 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - " \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Queue Management" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* The queue is used to task labelers with specific assets\n", - "* We can do any of the following:\n", - " * Set quality settings\n", - " * Set the order of items in the queue\n", - " * Set the percent of assets to review" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"\n", - "%pip install -q numpy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import labelbox as lb\n", - "from labelbox.schema.quality_mode import QualityMode\n", - "from uuid import uuid4\n", - "import json" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# API Key and Client\n", - "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Add your API key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Set up demo project" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e5920e0f", - "metadata": {}, - "outputs": [], - "source": [ - "# Create Labelbox project\n", - "\n", - "project = client.create_project(\n", - " name=\"batch-test-project\",\n", - " description=\"a description\",\n", - " quality_mode=QualityMode.Benchmark, # For Consensus projects use quality_mode = QualityMode.Consensus\n", - " media_type=lb.MediaType.Image,\n", - ")\n", - "\n", - "dataset = client.create_dataset(name=\"queue_dataset\")" - ] - }, - { - "cell_type": "markdown", - "id": "6b7db01c", - "metadata": {}, - "source": [ - "#### Create ontology and attach to project" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a16c2b6a", - "metadata": {}, - "outputs": [], - "source": [ - "classification_features = [\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"Quality Issues\",\n", - " options=[\n", - " lb.Option(value=\"blurry\", label=\"Blurry\"),\n", - " lb.Option(value=\"distorted\", label=\"Distorted\"),\n", - " ],\n", - " )\n", - "]\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " tools=[], classifications=classification_features\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology from new features\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Image,\n", - ")\n", - "\n", - "project.setup_editor(ontology)" - ] - }, - { - "cell_type": "markdown", - "id": "12e17422", - "metadata": {}, - "source": [ - "# Add data to your dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e92e987", - "metadata": {}, - "outputs": [], - "source": [ - "## Example image\n", - "uploads = []\n", - "global_keys = []\n", - "# Generate data rows\n", - "for i in range(1, 5):\n", - " global_key = str(uuid4())\n", - " row = {\n", - " \"row_data\": f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\": global_key,\n", - " }\n", - " global_keys.append(global_key)\n", - " uploads.append(row)\n", - "\n", - "data_rows = dataset.create_data_rows(uploads)\n", - "data_rows.wait_till_done()\n", - "print(\"Errors\", data_rows.errors)\n", - "print(\"Dataset status: \", data_rows.status)" - ] - }, - { - "cell_type": "markdown", - "id": "ab98d095", - "metadata": {}, - "source": [ - "# Attach data to your project and set data row priority" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "004955b1", - "metadata": {}, - "outputs": [], - "source": [ - "######## Create batches\n", - "\n", - "# Create the batch\n", - "\n", - "batch = project.create_batch(\n", - " \"batch-demo\", # Each batch in a project must have a unique name\n", - " global_keys=global_keys[\n", - " 0:2\n", - " ], # A list of data rows, data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", - ")\n", - "\n", - "batch2 = project.create_batch(\n", - " \"batch-demo-2\", # Each batch in a project must have a unique name\n", - " # Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n", - " global_keys=global_keys[\n", - " 2:4\n", - " ], # A list of data rows, data row ids or global keys\n", - " priority=1, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n", - ")\n", - "\n", - "print(\"Batch: \", batch)\n", - "print(\"Batch2: \", batch2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a980733", - "metadata": {}, - "outputs": [], - "source": [ - "print(\n", - " \"View the results here:\", f\"https://app.labelbox.com/projects/{project.uid}\"\n", - ")\n", - "# Click `start labeling` to see the images in order" - ] - }, - { - "cell_type": "markdown", - "id": "ee8ef753", - "metadata": {}, - "source": [ - "## Queue Order\n", - "- Add priority for each data row\n", - "- Update priority for each data row" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8deb361a", - "metadata": {}, - "outputs": [], - "source": [ - "export_task = project.export()\n", - "export_task.wait_till_done()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6c48e8f", - "metadata": {}, - "outputs": [], - "source": [ - "# Get data rows from project\n", - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(\n", - " lb.GlobalKey(data_row[\"data_row\"][\"global_key\"])\n", - " ) # Convert json data row into data row identifier object\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error)\n", - " )\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT\n", - " ).start(stream_handler=json_stream_handler)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7fde932", - "metadata": {}, - "outputs": [], - "source": [ - "# Get label parameter overrides (LPOs)\n", - "project_lpos = project.labeling_parameter_overrides()\n", - "\n", - "for lpo in project_lpos:\n", - " print(lpo)\n", - " print(\"Data row:\", lpo.data_row().uid)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e7cb187", - "metadata": {}, - "outputs": [], - "source": [ - "# Add LPOs\n", - "lpos = []\n", - "priority = 1\n", - "for data_row in data_rows:\n", - " lpos.append((data_row, priority))\n", - " priority += 1\n", - "\n", - "project.set_labeling_parameter_overrides(lpos)\n", - "\n", - "# Check results\n", - "project_lpos = list(project.labeling_parameter_overrides())\n", - "\n", - "for lpo in project_lpos:\n", - " print(lpo)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4a4bed6b", - "metadata": {}, - "outputs": [], - "source": [ - "# Update LPOs\n", - "global_keys = []\n", - "for data_row in data_rows:\n", - " global_keys.append(data_row.key)\n", - "\n", - "project.update_data_row_labeling_priority(\n", - " data_rows=lb.GlobalKeys(global_keys), priority=1\n", - ")\n", - "\n", - "# Check results\n", - "project_lpos = list(project.labeling_parameter_overrides())\n", - "\n", - "for lpo in project_lpos:\n", - " print(lpo)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Cleanup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, + "cells": [ + { + "metadata": {}, + "source": [ + "", + " ", + "\n" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "# Queue Management" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "* The queue is used to task labelers with specific assets\n", + "* We can do any of the following:\n", + " * Set quality settings\n", + " * Set the order of items in the queue\n", + " * Set the percent of assets to review" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"\n%pip install -q numpy", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "import labelbox as lb\nfrom labelbox.schema.quality_mode import QualityMode\nfrom uuid import uuid4\nimport json", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# API Key and Client\n", + "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Add your API key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "### Set up demo project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": [ + "#### Create project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# Create Labelbox project\n\nproject = client.create_project(\n name=\"batch-test-project\",\n description=\"a description\",\n quality_mode=QualityMode.\n Benchmark, # For Consensus projects use quality_mode = QualityMode.Consensus\n media_type=lb.MediaType.Image,\n)\n\ndataset = client.create_dataset(name=\"queue_dataset\")", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "#### Create ontology and attach to project" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "classification_features = [\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"Quality Issues\",\n options=[\n lb.Option(value=\"blurry\", label=\"Blurry\"),\n lb.Option(value=\"distorted\", label=\"Distorted\"),\n ],\n )\n]\n\nontology_builder = lb.OntologyBuilder(tools=[],\n classifications=classification_features)\n\nontology = client.create_ontology(\n \"Ontology from new features\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\nproject.setup_editor(ontology)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Add data to your dataset" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "## Example image\nuploads = []\nglobal_keys = []\n# Generate data rows\nfor i in range(1, 5):\n global_key = str(uuid4())\n row = {\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n global_key,\n }\n global_keys.append(global_key)\n uploads.append(row)\n\ndata_rows = dataset.create_data_rows(uploads)\ndata_rows.wait_till_done()\nprint(\"Errors\", data_rows.errors)\nprint(\"Dataset status: \", data_rows.status)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Attach data to your project and set data row priority" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "######## Create batches\n\n# Create the batch\n\nbatch = project.create_batch(\n \"batch-demo\", # Each batch in a project must have a unique name\n global_keys=global_keys[\n 0:2], # A list of data rows, data row ids or global keys\n priority=\n 5, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n)\n\nbatch2 = project.create_batch(\n \"batch-demo-2\", # Each batch in a project must have a unique name\n # Provide a slice of the data since you can't import assets with global keys that already exist in the project.\n global_keys=global_keys[\n 2:4], # A list of data rows, data row ids or global keys\n priority=\n 1, # priority between 1(Highest) - 5(lowest) 5 is the max priority that can be set\n)\n\nprint(\"Batch: \", batch)\nprint(\"Batch2: \", batch2)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "print(\"View the results here:\",\n f\"https://app.labelbox.com/projects/{project.uid}\")\n# Click `start labeling` to see the images in order", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Queue Order\n", + "- Add priority for each data row\n", + "- Update priority for each data row" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "export_task = project.export()\nexport_task.wait_till_done()", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Get data rows from project\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(lb.GlobalKey(data_row[\"data_row\"][\"global_key\"])\n ) # Convert json data row into data row identifier object\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Get label parameter overrides (LPOs)\nproject_lpos = project.labeling_parameter_overrides()\n\nfor lpo in project_lpos:\n print(lpo)\n print(\"Data row:\", lpo.data_row().uid)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Add LPOs\nlpos = []\npriority = 1\nfor data_row in data_rows:\n lpos.append((data_row, priority))\n priority += 1\n\nproject.set_labeling_parameter_overrides(lpos)\n\n# Check results\nproject_lpos = list(project.labeling_parameter_overrides())\n\nfor lpo in project_lpos:\n print(lpo)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": "# Update LPOs\nglobal_keys = []\nfor data_row in data_rows:\n global_keys.append(data_row.key)\n\nproject.update_data_row_labeling_priority(data_rows=lb.GlobalKeys(global_keys),\n priority=1)\n\n# Check results\nproject_lpos = list(project.labeling_parameter_overrides())\n\nfor lpo in project_lpos:\n print(lpo)", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "# Cleanup" + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", + "outputs": [], + "execution_count": null + } + ] +} \ No newline at end of file