From a83cd674c0952973767853e51192e733f01364ea Mon Sep 17 00:00:00 2001 From: x-eun Date: Thu, 13 Feb 2025 21:47:43 -0800 Subject: [PATCH 1/3] modify code for finding user object. remove hard code user id --- basics/user_management.ipynb | 83 +++++++++++++++--------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/basics/user_management.ipynb b/basics/user_management.ipynb index 4bb3878..444aad8 100644 --- a/basics/user_management.ipynb +++ b/basics/user_management.ipynb @@ -77,25 +77,11 @@ "organization = client.get_organization()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Please provide a dummy email here:\n", - "# Preferrably one you can access. If you have a google account you can do email+1@.com\n", - "DUMMY_EMAIL = \"SET THIS\"\n", - "# This should be set to an account that you wan't to change the permissions for.\n", - "# You could invite a new user, accept the invite and use that account if you don't want to effect any active users\n", - "DUMMY_USER_ACCOUNT_ID = \"ckneh4n8c9qvq0706uwwg5i16\"" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Roles\n", + "## Roles\n", "* When inviting a new user to an organization, there are various roles to select from.\n", "* All available roles to your org can be accessed via `client.get_roles()`" ] @@ -123,7 +109,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create\n", + "## Create\n", "* Users are created by sending an invite\n", "* An email will be sent to them and they will be asked to join your organization" ] @@ -132,7 +118,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Organization Level Permissions\n", + "### Organization Level Permissions\n", "* Invite a new labeler with labeling permissions on all projects" ] }, @@ -152,7 +138,8 @@ "metadata": {}, "outputs": [], "source": [ - "invite = organization.invite_user(DUMMY_EMAIL, roles[\"LABELER\"])" + "USER_EMAIL = \"\"\n", + "invite = organization.invite_user(USER_EMAIL, roles[\"LABELER\"])" ] }, { @@ -170,7 +157,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Project Level Permissions\n", + "### Project Level Permissions\n", "* Invite a new labeler with labeling permissions specific to a set of projects\n", "* Here we set organization level permissions to Roles.NONE to indicate that the user only has project level permissions" ] @@ -181,10 +168,11 @@ "metadata": {}, "outputs": [], "source": [ + "USER_EMAIL = \"\"\n", "project = client.create_project(name=\"test_user_management\",\n", " media_type=lb.MediaType.Image)\n", "project_role = lb.ProjectRole(project=project, role=roles[\"REVIEWER\"])\n", - "invite = organization.invite_user(DUMMY_EMAIL,\n", + "invite = organization.invite_user(USER_EMAIL,\n", " roles[\"NONE\"],\n", " project_roles=[project_role])" ] @@ -193,7 +181,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Read\n", + "## Read\n", "* Outstanding invites cannot be queried for at this time. This information can be found in the members tab of the web app.\n", "* You are able to query for members once they have joined." ] @@ -212,7 +200,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Update\n", + "## Update\n", "* There is no update on invites. Instead you must delete and resend them\n", "* You can update User roles" ] @@ -223,7 +211,17 @@ "metadata": {}, "outputs": [], "source": [ - "user = client._get_single(lb.User, DUMMY_USER_ACCOUNT_ID)\n", + "# Get all users in the organization\n", + "users = organization.users()\n", + "\n", + "# Filter the desired user using their email\n", + "USER_EMAIL = \"\"\n", + "user = next((u for u in users if u.email == USER_EMAIL), None)\n", + "\n", + "if user:\n", + " print(f\"User found: {user.name} ({user.email})\")\n", + "else:\n", + " print(\"User not found.\")\n", "\n", "# Give the user organization level permissions\n", "user.update_org_role(roles[\"LABELER\"])\n", @@ -236,46 +234,35 @@ "print(user.org_role())" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Remove the user from a project (Same as setting the project role to `roles.NONE`)\n", - "user.remove_from_project(project)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "* Invites can only be deleted from the ui at this time. \n", - "* Deleting invites can be done in the members tab of the web app." + "## Delete\n", + "You can remove users from projects and your organization using the SDK. Invites can only be deleted using the **Members** tab on the web platform at this moment." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, + "id": "568942a5", "metadata": {}, + "outputs": [], "source": [ - "* Delete the User\n", - "* Make sure you want to remove the user from the org:\n", - "* `>>> organization.remove_user(user)`" + "# Remove the user from a project\n", + "user.remove_from_project(project)\n", + "# Alternatively, set the project role to none\n", + "user.update_org_role(roles[\"NONE\"])\n", + "# Remove the user from the org\n", + "organization.remove_user(user)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Cleanup\n", - "* We created an extra project. Let's delete it" + "## Cleanup\n", + "Delete the project if you no longer need it:" ] }, { From 74af51ccf0bf0bb750b4f12ec8efb9f85ab41699 Mon Sep 17 00:00:00 2001 From: x-eun Date: Thu, 13 Feb 2025 22:35:10 -0800 Subject: [PATCH 2/3] add user group examples --- basics/user_management.ipynb | 123 ++++++++++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 1 deletion(-) diff --git a/basics/user_management.ipynb b/basics/user_management.ipynb index 444aad8..84e40be 100644 --- a/basics/user_management.ipynb +++ b/basics/user_management.ipynb @@ -54,7 +54,8 @@ "outputs": [], "source": [ "import labelbox as lb\n", - "import os" + "import os\n", + "from labelbox.schema.user_group import UserGroup, UserGroupColor" ] }, { @@ -257,6 +258,126 @@ "organization.remove_user(user)" ] }, + { + "cell_type": "markdown", + "id": "5d62aa21", + "metadata": {}, + "source": [ + "## Manage user groups\n", + "### Create user groups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69a5a82e", + "metadata": {}, + "outputs": [], + "source": [ + "# Define a user group\n", + "user_group = UserGroup(\n", + " client=client,\n", + " name=\"New User Group\",\n", + " color=UserGroupColor.BLUE\n", + " users=set(user, user1, user2),\n", + " projects=set(project)\n", + ")\n", + "\n", + "# Create the defined user group\n", + "created_group = user_group.create() " + ] + }, + { + "cell_type": "markdown", + "id": "b9edbdfd", + "metadata": {}, + "source": [ + "### Update user groups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "062b8006", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the user group properties to be updated\n", + "user_group.name = \"Updated User Group Name\"\n", + "user_group.color = UserGroupColor.GREEN\n", + "\n", + "# Add new projects to the group\n", + "projects = []\n", + "projects.append(user_group.projects)\n", + "projects.append([project_1, project_2])\n", + "user_group.projects = projects\n", + "\n", + "# Add new users to the group\n", + "\n", + "users = user_group.users\n", + "users.append([new_user_1, new_user_2])\n", + "user_group.users = users\n", + "\n", + "# Push the changes to the group\n", + "user_group.update()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce016ffb", + "metadata": {}, + "outputs": [], + "source": [ + "## Remove all members and projects from the group\n", + "user_group.users = []\n", + "user_group.projects = []\n", + "user_group.update()\n", + "\n", + "# Push the changes to the group\n", + "user_group.update()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7560a5cb", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete a user group\n", + "user_group.delete()" + ] + }, + { + "cell_type": "markdown", + "id": "047a83dc", + "metadata": {}, + "source": [ + "## Get user group info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3e18466", + "metadata": {}, + "outputs": [], + "source": [ + "# Get info of a user group\n", + "user_group.get()\n", + "\n", + "# Get all user groups in your workspace\n", + "user_groups = UserGroup(client).get_user_groups()\n", + "\n", + "# Search for a user group by its name\n", + "example_group = next((group for group in user_groups if group.name == \"example_name\"), None)\n", + "if example_group:\n", + " print(f\"Found user group 'example_name' with ID: {example_group.id}\")\n", + "else:\n", + " print(\"No user group named 'example_name' found\")" + ] + }, { "cell_type": "markdown", "metadata": {}, From 54d93f1742b0c8622fb7e491e92773e75772048c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 14 Feb 2025 06:36:29 +0000 Subject: [PATCH 3/3] :art: Cleaned --- annotation_import/audio.ipynb | 356 ++------ annotation_import/pdf.ipynb | 1029 +++------------------- basics/data_rows.ipynb | 471 +++------- basics/user_management.ipynb | 238 +++--- exports/export_data.ipynb | 754 +++------------- prediction_upload/pdf_predictions.ipynb | 1045 +++-------------------- requirements-dev.lock | 54 +- 7 files changed, 740 insertions(+), 3207 deletions(-) diff --git a/annotation_import/audio.ipynb b/annotation_import/audio.ipynb index 10383eb..ea01d70 100644 --- a/annotation_import/audio.ipynb +++ b/annotation_import/audio.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Audio Annotation Import\n", @@ -51,188 +53,111 @@ "* Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", "* Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* For information on what types of annotations are supported per data type, refer to this documentation:\n", " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Notes:\n", " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Replace with your API key\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported annotations for Audio" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Classification free text #####\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"text_audio\",\n value=lb_types.Text(answer=\"free text audio annotation\"),\n)\n\ntext_annotation_ndjson = {\n \"name\": \"text_audio\",\n \"answer\": \"free text audio annotation\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Classification free text #####\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"text_audio\",\n", - " value=lb_types.Text(answer=\"free text audio annotation\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\n", - " \"name\": \"text_audio\",\n", - " \"answer\": \"free text audio annotation\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "##### Checklist Classification #######\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_audio\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_audio\",\n \"answers\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "##### Checklist Classification #######\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_audio\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_audio\",\n", - " \"answers\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######## Radio Classification ######\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_audio\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\n\nradio_annotation_ndjson = {\n \"name\": \"radio_audio\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######## Radio Classification ######\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_audio\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"second_radio_answer\")),\n", - ")\n", - "\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_audio\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create one Labelbox dataset\n\nglobal_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n\nasset = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\ntask = dataset.create_data_rows([asset])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows: \", task.failed_data_rows)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create one Labelbox dataset\n", - "\n", - "global_key = \"sample-audio-1.mp3\" + str(uuid.uuid4())\n", - "\n", - "asset = {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/audio-sample-data/sample-audio-1.mp3\",\n", - " \"global_key\":\n", - " global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"audio_annotation_import_demo_dataset\")\n", - "task = dataset.create_data_rows([asset])\n", - "task.wait_till_done()\n", - "print(\"Errors:\", task.errors)\n", - "print(\"Failed data rows: \", task.failed_data_rows)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an ontology\n", @@ -240,232 +165,135 @@ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", "For example, when we create the text annotation, we provided the `name` as `text_audio`. Now, when we setup our ontology, we must ensure that the name of the tool is also `text_audio`. The same alignment must hold true for the other tools and classifications we create in our ontology." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "ontology_builder = lb.OntologyBuilder(classifications=[\n", - " lb.Classification(class_type=lb.Classification.Type.TEXT,\n", - " name=\"text_audio\"),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_audio\",\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_audio\",\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - "])\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Ontology Audio Annotations\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Audio,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", "## Step 3: Create a labeling project\n", "Connect the ontology to the labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create Labelbox project\nproject = client.create_project(name=\"audio_project\",\n media_type=lb.MediaType.Audio)\n\n# Setup your ontology\nproject.connect_ontology(\n ontology) # Connect your ontology and editor to your project", + "cell_type": "code", "outputs": [], - "source": [ - "# Create Labelbox project\n", - "project = client.create_project(name=\"audio_project\",\n", - " media_type=lb.MediaType.Audio)\n", - "\n", - "# Setup your ontology\n", - "project.connect_ontology(\n", - " ontology) # Connect your ontology and editor to your project" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Setup Batches and Ontology\n\n# Create a batch to send to your MAL project\nbatch = project.create_batch(\n \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)\n\nprint(\"Batch: \", batch)", + "cell_type": "code", "outputs": [], - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch = project.create_batch(\n", - " \"first-batch-audio-demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "print(\"Batch: \", batch)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the annotations payload\n", "Create the annotations payload using the snippets of code above\n", "\n", "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label = []\n", - "label.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[text_annotation, checklist_annotation, radio_annotation],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### NDJSON annotations \n", "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1rFv-VvHUBbzFYamz6nSMRJz1mEg6Ukqq#scrollTo=3umnTd-MfI0o&line=1&uniqifier=1)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annotations in [\n text_annotation_ndjson,\n checklist_annotation_ndjson,\n radio_annotation_ndjson,\n]:\n annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n label_ndjson.append(annotations)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annotations in [\n", - " text_annotation_ndjson,\n", - " checklist_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - "]:\n", - " annotations.update({\"dataRow\": {\"globalKey\": global_key}})\n", - " label_ndjson.append(annotations)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Upload annotations to a project as pre-labels or complete labels" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Model Assisted Labeling (MAL)\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload our label using Model-Assisted Labeling\n", - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=f\"mal_job-{str(uuid.uuid4())}\",\n", - " predictions=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Label Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload label for this data row in project\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload label for this data row in project\n", - "upload_job = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"label_import_job\" + str(uuid.uuid4()),\n", - " labels=label,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Optional deletions for cleanup " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/annotation_import/pdf.ipynb b/annotation_import/pdf.ipynb index 59964d1..b3d3390 100644 --- a/annotation_import/pdf.ipynb +++ b/annotation_import/pdf.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 1, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Annotation Import" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -54,538 +56,137 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Replace with your API key\n", "Guides on https://docs.labelbox.com/docs/create-an-api-key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Supported Annotations" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_annotations = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_annotations_ndjson = {\n \"name\":\n \"named_entity\",\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_annotations = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_annotations_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n# NDJSON\nradio_annotation_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\"\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "# NDJSON\n", - "radio_annotation_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\"\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\n# NDJSON\nchecklist_annotation_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\"\n },\n {\n \"name\": \"second_checklist_answer\"\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\"\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\"\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n end=lb_types.Point(x=518.571,\n y=245.143), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_annotation_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": {\n \"top\": 135.3,\n \"left\": 102.771,\n \"height\": 109.843,\n \"width\": 415.8\n },\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=102.771, y=135.3), # x = left, y = top\n", - " end=lb_types.Point(x=518.571,\n", - " y=245.143), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_annotation_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": {\n", - " \"top\": 135.3,\n", - " \"left\": 102.771,\n", - " \"height\": 109.843,\n", - " \"width\": 415.8\n", - " },\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_radio_annotation_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_annotation_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_annotation_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\"),\n)\n\ntext_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\"),\n", - ")\n", - "\n", - "text_annotation_ndjson = {\"name\": \"free_text\", \"answer\": \"sample text\"}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n end=lb_types.Point(x=566.657,\n y=420.986), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_annotation_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\"\n },\n }],\n },\n }],\n \"bbox\": {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=317.271, y=226.757), # x = left, y = top\n", - " end=lb_types.Point(x=566.657,\n", - " y=420.986), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_annotation_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\"\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\"\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\"\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### Relationships ##########\nentity_source = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_target = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source = str(uuid.uuid4())\nuuid_target = str(uuid.uuid4())\n\nentity_source_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_source,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\n\nentity_target_ndjson = {\n \"name\":\n \"named_entity\",\n \"uuid\":\n uuid_target,\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}\nner_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source,\n \"target\": uuid_target,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### Relationships ##########\n", - "entity_source = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_target = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source = str(uuid.uuid4())\n", - "uuid_target = str(uuid.uuid4())\n", - "\n", - "entity_source_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_source,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "\n", - "entity_target_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"uuid\":\n", - " uuid_target,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}\n", - "ner_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source,\n", - " \"target\": uuid_target,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with relationships #############\n# Python Annotation\nbbox_source = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n end=lb_types.Point(x=270.907,\n y=149.556), # x = left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_target = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=96.424, y=66.251),\n end=lb_types.Point(x=179.074, y=146.932),\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n)\n\nbbox_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=bbox_source,\n target=bbox_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)\n\n## Only supported for MAL imports\nuuid_source_2 = str(uuid.uuid4())\nuuid_target_2 = str(uuid.uuid4())\n\nbbox_source_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_source_2,\n \"bbox\": {\n \"top\": 68.875,\n \"left\": 188.257,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_target_ndjson = {\n \"name\": \"bounding_box\",\n \"uuid\": uuid_target_2,\n \"bbox\": {\n \"top\": 66.251,\n \"left\": 96.424,\n \"height\": 80.681,\n \"width\": 82.65\n },\n \"page\": 1,\n \"unit\": \"POINTS\",\n}\n\nbbox_relationship_annotation_ndjson = {\n \"name\": \"relationship\",\n \"relationship\": {\n \"source\": uuid_source_2,\n \"target\": uuid_target_2,\n \"type\": \"unidirectional\",\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with relationships #############\n", - "# Python Annotation\n", - "bbox_source = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=188.257, y=68.875), # x = left, y = top\n", - " end=lb_types.Point(x=270.907,\n", - " y=149.556), # x = left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_target = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=96.424, y=66.251),\n", - " end=lb_types.Point(x=179.074, y=146.932),\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - ")\n", - "\n", - "bbox_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=bbox_source,\n", - " target=bbox_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")\n", - "\n", - "## Only supported for MAL imports\n", - "uuid_source_2 = str(uuid.uuid4())\n", - "uuid_target_2 = str(uuid.uuid4())\n", - "\n", - "bbox_source_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_source_2,\n", - " \"bbox\": {\n", - " \"top\": 68.875,\n", - " \"left\": 188.257,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_target_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"uuid\": uuid_target_2,\n", - " \"bbox\": {\n", - " \"top\": 66.251,\n", - " \"left\": 96.424,\n", - " \"height\": 80.681,\n", - " \"width\": 82.65\n", - " },\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}\n", - "\n", - "bbox_relationship_annotation_ndjson = {\n", - " \"name\": \"relationship\",\n", - " \"relationship\": {\n", - " \"source\": uuid_source_2,\n", - " \"target\": uuid_target_2,\n", - " \"type\": \"unidirectional\",\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Upload Annotations - putting it all together " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -599,206 +200,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483_doc.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 2: Create/select an Ontology for your project\n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(tool=lb.Tool.Type.RELATIONSHIP, name=\"relationship\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 3: Creating a labeling project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a Labelbox project\nproject = client.create_project(name=\"PDF_annotation_demo\",\n media_type=lb.MediaType.Document)\nproject.connect_ontology(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a Labelbox project\n", - "project = client.create_project(name=\"PDF_annotation_demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.connect_ontology(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 4: Send a batch of data rows to the project" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"PDF_annotation_batch\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 5. Create the annotation payload\n", @@ -807,356 +262,124 @@ "Labelbox support NDJSON only for this data type.\n", "\n", "The resulting label should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "##### Step 5.1: First, we need to populate the text selections for Entity annotations\n", "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "task = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_buffered_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = output.json\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n \"Organic charge transfer salts based on the donor\",\n \"the experimental investigations on this issue have not\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\ntext_selections_source = []\ntext_selections_target = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_annotations_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_annotation_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[2]:\n relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_source = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n text_selections_source.append(text_selection_entity_source)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entity_source_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_source, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[3]:\n relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n text_selection_entity_target = lb_types.DocumentTextSelection(\n group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n text_selections_target.append(text_selection_entity_target)\n # build text selections forthe NDJson annotations\n update_text_selections(\n annotation=entity_target_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n relationship_target, # ids representing individual words from the group\n page=1,\n )", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - " \"Organic charge transfer salts based on the donor\",\n", - " \"the experimental investigations on this issue have not\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "text_selections_source = []\n", - "text_selections_target = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_annotations_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_annotation_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[2]:\n", - " relationship_source = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_source = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=relationship_source, page=1)\n", - " text_selections_source.append(text_selection_entity_source)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_source_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_source, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[3]:\n", - " relationship_target = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " text_selection_entity_target = lb_types.DocumentTextSelection(\n", - " group_id=group[\"id\"], tokenIds=relationship_target, page=1)\n", - " text_selections_target.append(text_selection_entity_target)\n", - " # build text selections forthe NDJson annotations\n", - " update_text_selections(\n", - " annotation=entity_target_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " relationship_target, # ids representing individual words from the group\n", - " page=1,\n", - " )" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Re-write the python annotations to include text selections (only required for python annotation types)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# re-write the entity annotation with text selections\nentities_annotation_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", textSelections=text_selections)\nentities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_annotation_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# re-write the entity source and target annotations withe text selectios\nentity_source_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_source)\nentity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_source_doc)\n\nentity_target_doc = lb_types.DocumentEntity(\n name=\"named_entity\", text_selections=text_selections_target)\nentity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n value=entity_target_doc)\n\n# re-write the entity relationship with the re-created entities\nentity_relationship = lb_types.RelationshipAnnotation(\n name=\"relationship\",\n value=lb_types.Relationship(\n source=entity_source,\n target=entity_target,\n type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n ),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# re-write the entity annotation with text selections\n", - "entities_annotation_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", textSelections=text_selections)\n", - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_annotation_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\", textSelections=text_selections_ner)\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# re-write the entity source and target annotations withe text selectios\n", - "entity_source_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_source)\n", - "entity_source = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_source_doc)\n", - "\n", - "entity_target_doc = lb_types.DocumentEntity(\n", - " name=\"named_entity\", text_selections=text_selections_target)\n", - "entity_target = lb_types.ObjectAnnotation(name=\"named_entity\",\n", - " value=entity_target_doc)\n", - "\n", - "# re-write the entity relationship with the re-created entities\n", - "entity_relationship = lb_types.RelationshipAnnotation(\n", - " name=\"relationship\",\n", - " value=lb_types.Relationship(\n", - " source=entity_source,\n", - " target=entity_target,\n", - " type=lb_types.Relationship.Type.UNIDIRECTIONAL,\n", - " ),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\nprint(f\"entities_annotation={entities_annotation}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\nprint(f\"entity_source_ndjson={entity_source_ndjson}\")\nprint(f\"entity_target_ndjson={entity_target_ndjson}\")\nprint(f\"entity_source={entity_source}\")\nprint(f\"entity_target={entity_target}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_annotations_ndjson}\")\n", - "print(f\"entities_annotation={entities_annotation}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_annotation_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")\n", - "print(f\"entity_source_ndjson={entity_source_ndjson}\")\n", - "print(f\"entity_target_ndjson={entity_target_ndjson}\")\n", - "print(f\"entity_source={entity_source}\")\n", - "print(f\"entity_target={entity_target}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Python annotation\n", "Here we create the complete labels ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created. Note that only a handful of python annotation types are supported for PDF documents." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n entity_source,\n entity_target,\n entity_relationship, # Only supported for MAL imports\n bbox_source,\n bbox_target,\n bbox_relationship, # Only supported for MAL imports\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " entity_source,\n", - " entity_target,\n", - " entity_relationship, # Only supported for MAL imports\n", - " bbox_source,\n", - " bbox_target,\n", - " bbox_relationship, # Only supported for MAL imports\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### NDJson annotations\n", "Here we create the complete labels ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_ndjson = []\nfor annot in [\n entities_annotations_ndjson,\n checklist_annotation_ndjson,\n nested_checklist_annotation_ndjson,\n text_annotation_ndjson,\n radio_annotation_ndjson,\n nested_radio_annotation_ndjson,\n bbox_annotation_ndjson,\n bbox_with_radio_subclass_annotation_ndjson,\n ner_with_checklist_subclass_annotation_ndjson,\n entity_source_ndjson,\n entity_target_ndjson,\n ner_relationship_annotation_ndjson, # Only supported for MAL imports\n bbox_source_ndjson,\n bbox_target_ndjson,\n bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_ndjson = []\n", - "for annot in [\n", - " entities_annotations_ndjson,\n", - " checklist_annotation_ndjson,\n", - " nested_checklist_annotation_ndjson,\n", - " text_annotation_ndjson,\n", - " radio_annotation_ndjson,\n", - " nested_radio_annotation_ndjson,\n", - " bbox_annotation_ndjson,\n", - " bbox_with_radio_subclass_annotation_ndjson,\n", - " ner_with_checklist_subclass_annotation_ndjson,\n", - " entity_source_ndjson,\n", - " entity_target_ndjson,\n", - " ner_relationship_annotation_ndjson, # Only supported for MAL imports\n", - " bbox_source_ndjson,\n", - " bbox_target_ndjson,\n", - " bbox_relationship_annotation_ndjson, # Only supported for MAL imports\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step 6: Import the annotation payload\n", "For the purpose of this tutorial only import one of the annotations payloads at the time (NDJSON or Python annotation types)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option A: Upload to a labeling project as pre-labels (MAL)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n predictions=labels,\n)\n\nupload_job.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job = lb.MALPredictionImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"pdf_annotation_upload\" + str(uuid.uuid4()),\n", - " predictions=labels,\n", - ")\n", - "\n", - "upload_job.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job.errors)\n", - "print(\"Status of uploads: \", upload_job.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Option B: Upload to a labeling project using ground truth" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Uncomment this code when excluding relationships from label import\n## Relationships are not currently supported for label import\n\n# upload_job = lb.LabelImport.create_from_objects(\n# client = client,\n# project_id = project.uid,\n# name=\"label_import_job\"+str(uuid.uuid4()),\n# labels=labels) ## Remove unsupported relationships from the labels list\n\n# print(\"Errors:\", upload_job.errors)\n# print(\"Status of uploads: \", upload_job.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Uncomment this code when excluding relationships from label import\n", - "## Relationships are not currently supported for label import\n", - "\n", - "# upload_job = lb.LabelImport.create_from_objects(\n", - "# client = client,\n", - "# project_id = project.uid,\n", - "# name=\"label_import_job\"+str(uuid.uuid4()),\n", - "# labels=labels) ## Remove unsupported relationships from the labels list\n", - "\n", - "# print(\"Errors:\", upload_job.errors)\n", - "# print(\"Status of uploads: \", upload_job.statuses)" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + ] +} \ No newline at end of file diff --git a/basics/data_rows.ipynb b/basics/data_rows.ipynb index 4ffa472..cee3206 100644 --- a/basics/data_rows.ipynb +++ b/basics/data_rows.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Data rows" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Data rows are the assets that are being labeled. We currently support the following asset types:\n", @@ -47,520 +49,267 @@ " * Conversational\n", "* A data row cannot exist without belonging to a dataset.\n", "* Data rows are added to labeling tasks by first attaching them to datasets and then creating batches in projects" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install labelbox -q", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install labelbox -q" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport uuid\nimport json", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import uuid\n", - "import json" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your api key\nAPI_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get data rows from projects" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Pick a project with batches that have data rows with global keys\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)\nbatches = list(project.batches())\nprint(batches)\n# This is the same as\n# -> dataset = client.get_dataset(dataset_id)", + "cell_type": "code", "outputs": [], - "source": [ - "# Pick a project with batches that have data rows with global keys\n", - "PROJECT_ID = \"\"\n", - "project = client.get_project(PROJECT_ID)\n", - "batches = list(project.batches())\n", - "print(batches)\n", - "# This is the same as\n", - "# -> dataset = client.get_dataset(dataset_id)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Fetch data rows from project's batches\n", "\n", "Batches will need to be exported from your project as a export parameter. Before you can export from a project you will need an ontology attached." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "batch_ids = [batch.uid for batch in batches]\n\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"performance_details\": True,\n \"batch_ids\":\n batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n # you can export all the data without using this parameter\n}\nfilters = {}\n\n# A task is returned, this provides additional information about the status of your task, such as\n# any errors encountered\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "batch_ids = [batch.uid for batch in batches]\n", - "\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"performance_details\": True,\n", - " \"batch_ids\":\n", - " batch_ids, # Include batch ids if you only want to export specific batches, otherwise,\n", - " # you can export all the data without using this parameter\n", - "}\n", - "filters = {}\n", - "\n", - "# A task is returned, this provides additional information about the status of your task, such as\n", - "# any errors encountered\n", - "export_task = project.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "data_rows = []\n\n\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n data_row = output.json\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)", + "cell_type": "code", "outputs": [], - "source": [ - "data_rows = []\n", - "\n", - "\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " data_row = output.json\n", - " data_rows.append(data_row)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Get single data row\ndata_row = data_rows[0]\nprint(data_row)", + "cell_type": "code", "outputs": [], - "source": [ - "# Get single data row\n", - "data_row = data_rows[0]\n", - "print(data_row)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get labels from the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\nprint(\"Global key\", data_row[\"data_row\"][\"global_key\"])", + "cell_type": "code", "outputs": [], - "source": [ - "print(\"Associated label(s)\", data_row[\"projects\"][project.uid][\"labels\"])\n", - "print(\"Global key\", data_row[\"data_row\"][\"global_key\"])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Get data row ids by using global keys" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"\"\ntask = client.get_data_row_ids_for_global_keys([global_key])\nprint(f\"Data row id: {task['results']}\")", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"\"\n", - "task = client.get_data_row_ids_for_global_keys([global_key])\n", - "print(f\"Data row id: {task['results']}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create\n", "We recommend the following methods to create data rows : `dataset.upsert_data_rows()`, and `dataset.create_data_rows()`, " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create data rows via `dataset.upsert_data_rows()`" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Create a dataset\ndataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n# You can also upload metadata along with your data row\nmdo = client.get_data_row_metadata_ontology()", + "cell_type": "code", "outputs": [], - "source": [ - "# Create a dataset\n", - "dataset = client.create_dataset(name=\"data_rows_demo_dataset_6\")\n", - "# You can also upload metadata along with your data row\n", - "mdo = client.get_data_row_metadata_ontology()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "uploads = []\n# Generate data rows\nfor i in range(1, 8):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n \"attachments\": [\n {\n \"type\":\n \"IMAGE_OVERLAY\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\": \"RAW_TEXT\",\n \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n },\n {\n \"type\":\n \"TEXT_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n },\n {\n \"type\":\n \"IMAGE\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n },\n {\n \"type\":\n \"VIDEO\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n },\n {\n \"type\":\n \"HTML\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n },\n {\n \"type\":\n \"PDF_URL\",\n \"value\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n },\n ],\n })\n\ntask1 = dataset.upsert_data_rows(uploads)\ntask1.wait_till_done()\nprint(\"ERRORS: \", task1.errors)\nprint(\"RESULTS:\", task1.result)", + "cell_type": "code", "outputs": [], - "source": [ - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 8):\n", - " uploads.append({\n", - " \"row_data\":\n", - " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\":\n", - " \"TEST-ID-%id\" % uuid.uuid1(),\n", - " ## add metadata (optional)\n", - " \"metadata_fields\": [\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"tag\"].\n", - " uid, # specify the schema id\n", - " value=\"tag_string\", # typed inputs\n", - " ),\n", - " ],\n", - " \"attachments\": [\n", - " {\n", - " \"type\":\n", - " \"IMAGE_OVERLAY\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", - " },\n", - " {\n", - " \"type\": \"RAW_TEXT\",\n", - " \"value\": \"IOWA, Zone 2232, June 2022 [Text string]\",\n", - " },\n", - " {\n", - " \"type\":\n", - " \"TEXT_URL\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/text_attachment.txt\",\n", - " },\n", - " {\n", - " \"type\":\n", - " \"IMAGE\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/disease_attachment.jpeg\",\n", - " },\n", - " {\n", - " \"type\":\n", - " \"VIDEO\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/drone_video.mp4\",\n", - " },\n", - " {\n", - " \"type\":\n", - " \"HTML\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-sample-datasets/Docs/windy.html\",\n", - " },\n", - " {\n", - " \"type\":\n", - " \"PDF_URL\",\n", - " \"value\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\",\n", - " },\n", - " ],\n", - " })\n", - "\n", - "task1 = dataset.upsert_data_rows(uploads)\n", - "task1.wait_till_done()\n", - "print(\"ERRORS: \", task1.errors)\n", - "print(\"RESULTS:\", task1.result)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Create data rows from data in your local path " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "from PIL import Image\n\n# Create dummy empty jpeg file\nwidth = 400\nheight = 300\ncolor = (255, 255, 255) # White color\nimage = Image.new(\"RGB\", (width, height), color)\n\n# Save the image as a JPEG file\nimage.save(\"dummy.jpg\")\n\nlocal_data_path = \"dummy.jpg\"\n\ndata = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n\ntask3 = dataset.upsert_data_rows([data])\ntask3.wait_till_done()\nprint(\"ERRORS: \", task3.errors)\nprint(\"RESULTS:\", task3.result)", + "cell_type": "code", "outputs": [], - "source": [ - "from PIL import Image\n", - "\n", - "# Create dummy empty jpeg file\n", - "width = 400\n", - "height = 300\n", - "color = (255, 255, 255) # White color\n", - "image = Image.new(\"RGB\", (width, height), color)\n", - "\n", - "# Save the image as a JPEG file\n", - "image.save(\"dummy.jpg\")\n", - "\n", - "local_data_path = \"dummy.jpg\"\n", - "\n", - "data = {\"row_data\": local_data_path, \"global_key\": str(uuid.uuid4())}\n", - "\n", - "task3 = dataset.upsert_data_rows([data])\n", - "task3.wait_till_done()\n", - "print(\"ERRORS: \", task3.errors)\n", - "print(\"RESULTS:\", task3.result)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# You can mix local files with urls when creating data rows\ntask4 = dataset.upsert_data_rows([\n {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n \"global_key\":\n str(uuid.uuid4()),\n },\n {\n \"row_data\": local_data_path,\n \"global_key\": str(uuid.uuid4())\n },\n])\ntask4.wait_till_done()\nprint(\"ERRORS: \", task4.errors)\nprint(\"RESULTS:\", task4.result)", + "cell_type": "code", "outputs": [], - "source": [ - "# You can mix local files with urls when creating data rows\n", - "task4 = dataset.upsert_data_rows([\n", - " {\n", - " \"row_data\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_0009.jpeg\",\n", - " \"global_key\":\n", - " str(uuid.uuid4()),\n", - " },\n", - " {\n", - " \"row_data\": local_data_path,\n", - " \"global_key\": str(uuid.uuid4())\n", - " },\n", - "])\n", - "task4.wait_till_done()\n", - "print(\"ERRORS: \", task4.errors)\n", - "print(\"RESULTS:\", task4.result)" - ] - }, - { - "cell_type": "markdown", + "execution_count": null + }, + { "metadata": {}, "source": [ "### Create data rows via `dataset.create_data_rows()`\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")", + "cell_type": "code", "outputs": [], - "source": [ - "dataset_2 = client.create_dataset(name=\"data_rows_demo_dataset_3\")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "uploads = []\n# Generate data rows\nfor i in range(1, 9):\n uploads.append({\n \"row_data\":\n f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n \"global_key\":\n \"TEST-ID-%id\" % uuid.uuid1(),\n ## add metadata (optional)\n \"metadata_fields\": [\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].\n uid, # specify the schema id\n value=\"tag_string\", # typed inputs\n ),\n ],\n })\n\ntask1_2 = dataset_2.create_data_rows(uploads)\ntask1_2.wait_till_done()\nprint(\"ERRORS: \", task1_2.errors)\nprint(\"RESULTS:\", task1_2.result)", + "cell_type": "code", "outputs": [], - "source": [ - "uploads = []\n", - "# Generate data rows\n", - "for i in range(1, 9):\n", - " uploads.append({\n", - " \"row_data\":\n", - " f\"https://storage.googleapis.com/labelbox-datasets/People_Clothing_Segmentation/jpeg_images/IMAGES/img_000{i}.jpeg\",\n", - " \"global_key\":\n", - " \"TEST-ID-%id\" % uuid.uuid1(),\n", - " ## add metadata (optional)\n", - " \"metadata_fields\": [\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"tag\"].\n", - " uid, # specify the schema id\n", - " value=\"tag_string\", # typed inputs\n", - " ),\n", - " ],\n", - " })\n", - "\n", - "task1_2 = dataset_2.create_data_rows(uploads)\n", - "task1_2.wait_till_done()\n", - "print(\"ERRORS: \", task1_2.errors)\n", - "print(\"RESULTS:\", task1_2.result)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Update\n", "`dataset.upsert_data_rows()` can also be use to update data rows\n", "\n", "To update data rows using this method, you need to pass a `key`, which can reference either a global key or a data row ID. Additionally, include any fields that you wish to update along with their new values.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Fetch a data row from the first dataset example\nts = dataset.export()\nts.wait_till_done()\nDATA_ROW_ID = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"id\"]\nGLOBAL_KEY = [output.json for output in ts.get_buffered_stream()\n ][0][\"data_row\"][\"global_key\"]\n\nprint(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Fetch a data row from the first dataset example\n", - "ts = dataset.export()\n", - "ts.wait_till_done()\n", - "DATA_ROW_ID = [output.json for output in ts.get_buffered_stream()\n", - " ][0][\"data_row\"][\"id\"]\n", - "GLOBAL_KEY = [output.json for output in ts.get_buffered_stream()\n", - " ][0][\"data_row\"][\"global_key\"]\n", - "\n", - "print(f\"Pick either a data row id : {DATA_ROW_ID} or global key: {GLOBAL_KEY}\")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\ndata = {\n \"key\":\n lb.UniqueId(DATA_ROW_ID),\n \"global_key\":\n \"NEW-ID-%id\" % uuid.uuid1(),\n \"metadata_fields\": [\n # New metadata\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n value=\"2000-01-01 00:00:00\",\n ),\n # Include original metadata otherwise it will be removed\n lb.DataRowMetadataField(\n schema_id=mdo.reserved_by_name[\"tag\"].uid,\n value=\"tag_string\",\n ),\n ],\n}\n\ntask5 = dataset_2.upsert_data_rows([data])\ntask5.wait_till_done()\nprint(\"ERRORS: \", task5.errors)\nprint(\"RESULTS:\", task5.result)", + "cell_type": "code", "outputs": [], - "source": [ - "# Update the global key assodicated with the DATAROW_ID or GLOBAL_KEY, and include a additional metadata\n", - "data = {\n", - " \"key\":\n", - " lb.UniqueId(DATA_ROW_ID),\n", - " \"global_key\":\n", - " \"NEW-ID-%id\" % uuid.uuid1(),\n", - " \"metadata_fields\": [\n", - " # New metadata\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"captureDateTime\"].uid,\n", - " value=\"2000-01-01 00:00:00\",\n", - " ),\n", - " # Include original metadata otherwise it will be removed\n", - " lb.DataRowMetadataField(\n", - " schema_id=mdo.reserved_by_name[\"tag\"].uid,\n", - " value=\"tag_string\",\n", - " ),\n", - " ],\n", - "}\n", - "\n", - "task5 = dataset_2.upsert_data_rows([data])\n", - "task5.wait_till_done()\n", - "print(\"ERRORS: \", task5.errors)\n", - "print(\"RESULTS:\", task5.result)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Create a single attachment on an existing data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# You can only create one attachment at the time.\nDATA_ROW_ID = \"\"\ndata_row = client.get_data_row(DATA_ROW_ID)\nattachment = data_row.create_attachment(\n attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")", + "cell_type": "code", "outputs": [], - "source": [ - "# You can only create one attachment at the time.\n", - "DATA_ROW_ID = \"\"\n", - "data_row = client.get_data_row(DATA_ROW_ID)\n", - "attachment = data_row.create_attachment(\n", - " attachment_type=\"RAW_TEXT\", attachment_value=\"LABELERS WILL SEE THIS\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Update a recently created attachment " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")", + "cell_type": "code", "outputs": [], - "source": [ - "attachment.update(type=\"RAW_TEXT\", value=\"NEW RAW TEXT\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Delete a single data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "DATAROW_ID_TO_DELETE = \"\"\ndata_row = client.get_data_row(DATAROW_ID_TO_DELETE)\ndata_row.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "DATAROW_ID_TO_DELETE = \"\"\n", - "data_row = client.get_data_row(DATAROW_ID_TO_DELETE)\n", - "data_row.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Bulk delete data row objects" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\nlb.DataRow.bulk_delete(list(dataset.data_rows()))", + "cell_type": "code", "outputs": [], - "source": [ - "# Bulk delete a list of data_rows ( limit: 4K data rows per call)\n", - "lb.DataRow.bulk_delete(list(dataset.data_rows()))" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/basics/user_management.ipynb b/basics/user_management.ipynb index 84e40be..a190b99 100644 --- a/basics/user_management.ipynb +++ b/basics/user_management.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 5, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", " \n", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,10 +24,10 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# User Management\n", @@ -36,181 +38,180 @@ " * assign users to projects\n", " * set / update / revoke project role\n", " * delete users from org" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "%pip install \"labelbox[data]\"" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "import labelbox as lb\n", "import os\n", "from labelbox.schema.user_group import UserGroup, UserGroupColor" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# Add your api key\n", "API_KEY = None\n", "client = lb.Client(api_key=API_KEY)\n", "organization = client.get_organization()" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Roles\n", "* When inviting a new user to an organization, there are various roles to select from.\n", "* All available roles to your org can be accessed via `client.get_roles()`" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "roles = client.get_roles()\n", "for name, role in roles.items():\n", " print(role.name, \":\", role.uid)" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Above we printed out all of the roles available to the current org.\n", "* Notice the `NONE`. That is for project level roles" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Create\n", "* Users are created by sending an invite\n", "* An email will be sent to them and they will be asked to join your organization" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Organization Level Permissions\n", "* Invite a new labeler with labeling permissions on all projects" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# First make sure that you have enough seats:\n", "organization.invite_limit()" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "USER_EMAIL = \"\"\n", "invite = organization.invite_user(USER_EMAIL, roles[\"LABELER\"])" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "print(invite.created_at)\n", "print(invite.organization_role_name)\n", "print(invite.email)" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Project Level Permissions\n", "* Invite a new labeler with labeling permissions specific to a set of projects\n", "* Here we set organization level permissions to Roles.NONE to indicate that the user only has project level permissions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "USER_EMAIL = \"\"\n", - "project = client.create_project(name=\"test_user_management\",\n", - " media_type=lb.MediaType.Image)\n", + "project = client.create_project(\n", + " name=\"test_user_management\", media_type=lb.MediaType.Image\n", + ")\n", "project_role = lb.ProjectRole(project=project, role=roles[\"REVIEWER\"])\n", - "invite = organization.invite_user(USER_EMAIL,\n", - " roles[\"NONE\"],\n", - " project_roles=[project_role])" - ] + "invite = organization.invite_user(\n", + " USER_EMAIL, roles[\"NONE\"], project_roles=[project_role]\n", + ")" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Read\n", "* Outstanding invites cannot be queried for at this time. This information can be found in the members tab of the web app.\n", "* You are able to query for members once they have joined." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "users = list(organization.users())\n", "print(users[0])" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Update\n", "* There is no update on invites. Instead you must delete and resend them\n", "* You can update User roles" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "# Get all users in the organization\n", "users = organization.users()\n", @@ -233,22 +234,21 @@ "# Make the user a labeler for the current project\n", "user.upsert_project_role(project, roles[\"LABELER\"])\n", "print(user.org_role())" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Delete\n", "You can remove users from projects and your organization using the SDK. Invites can only be deleted using the **Members** tab on the web platform at this moment." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, - "id": "568942a5", "metadata": {}, - "outputs": [], "source": [ "# Remove the user from a project\n", "user.remove_from_project(project)\n", @@ -256,23 +256,21 @@ "user.update_org_role(roles[\"NONE\"])\n", "# Remove the user from the org\n", "organization.remove_user(user)" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", - "id": "5d62aa21", "metadata": {}, "source": [ "## Manage user groups\n", "### Create user groups" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, - "id": "69a5a82e", "metadata": {}, - "outputs": [], "source": [ "# Define a user group\n", "user_group = UserGroup(\n", @@ -285,22 +283,20 @@ "\n", "# Create the defined user group\n", "created_group = user_group.create() " - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", - "id": "b9edbdfd", "metadata": {}, "source": [ "### Update user groups" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, - "id": "062b8006", "metadata": {}, - "outputs": [], "source": [ "# Define the user group properties to be updated\n", "user_group.name = \"Updated User Group Name\"\n", @@ -320,14 +316,13 @@ "\n", "# Push the changes to the group\n", "user_group.update()" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, - "id": "ce016ffb", "metadata": {}, - "outputs": [], "source": [ "## Remove all members and projects from the group\n", "user_group.users = []\n", @@ -336,33 +331,30 @@ "\n", "# Push the changes to the group\n", "user_group.update()" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, - "id": "7560a5cb", "metadata": {}, - "outputs": [], "source": [ "# Delete a user group\n", "user_group.delete()" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", - "id": "047a83dc", "metadata": {}, "source": [ "## Get user group info" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, - "id": "d3e18466", "metadata": {}, - "outputs": [], "source": [ "# Get info of a user group\n", "user_group.get()\n", @@ -371,36 +363,34 @@ "user_groups = UserGroup(client).get_user_groups()\n", "\n", "# Search for a user group by its name\n", - "example_group = next((group for group in user_groups if group.name == \"example_name\"), None)\n", + "example_group = next(\n", + " (group for group in user_groups if group.name == \"example_name\"), None\n", + ")\n", "if example_group:\n", " print(f\"Found user group 'example_name' with ID: {example_group.id}\")\n", "else:\n", " print(\"No user group named 'example_name' found\")" - ] + ], + "cell_type": "code", + "outputs": [], + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Cleanup\n", "Delete the project if you no longer need it:" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, - "outputs": [], "source": [ "project.delete()" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" + ], + "cell_type": "code", + "outputs": [], + "execution_count": null } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + ] +} \ No newline at end of file diff --git a/exports/export_data.ipynb b/exports/export_data.ipynb index 3282e89..2196011 100644 --- a/exports/export_data.ipynb +++ b/exports/export_data.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,60 +24,49 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# Export data\n", "How to export data for projects, datasets, slices, data rows and models, with examples for each type of v2 export along with details on optional parameters and filters.\n", "\n", "***Beginning with SDK version 3.68, the `export_v2()` method has been enhanced to incorporate streamable functionality.***" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"\n%pip install -q urllib3", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"\n", - "%pip install -q urllib3" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\nimport urllib.request\nfrom PIL import Image\nimport time", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "import urllib.request\n", - "from PIL import Image\n", - "import time" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# API Key and Client\n", "See the developer guide for [creating an API key](https://docs.labelbox.com/reference/create-api-key)." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a project\n", @@ -105,7 +96,7 @@ "You can set the range for `last_activity_at` and `label_created_at` in the following formats: \n", "- `YYYY-MM-DD`\n", "- `YYYY-MM-DD hh:mm:ss`\n", - "- `YYYY-MM-DDThh:mm:ss±hhmm` (ISO 8601)\n", + "- `YYYY-MM-DDThh:mm:ss\u00b1hhmm` (ISO 8601)\n", "\n", "The ISO 8061 format allows you to specify the timezone, while the other two formats assume timezone from the user's workspace settings.\n", "\n", @@ -121,164 +112,61 @@ "- `InReview`\n", "- `InRework`\n", "- `Done`\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the project ID of the project from which you wish to export data rows.\nPROJECT_ID = \"\"\nproject = client.get_project(PROJECT_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the project ID of the project from which you wish to export data rows.\n", - "PROJECT_ID = \"\"\n", - "project = client.get_project(PROJECT_ID)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n # \"batch_ids\": [\"\", \"\"],\n # \"workflow_status\": \"\"\n}\n\nexport_task = project.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"],\n", - " # \"batch_ids\": [\"\", \"\"],\n", - " # \"workflow_status\": \"\"\n", - "}\n", - "\n", - "export_task = project.export_v2(params=export_params, filters=filters)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n # \"batch_ids\": [\"\", \"\"],\n # \"workflow_status\": \"\"\n}\n\nexport_task = project.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"],\n", - " # \"batch_ids\": [\"\", \"\"],\n", - " # \"workflow_status\": \"\"\n", - "}\n", - "\n", - "export_task = project.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Provide results with JSON converter\n", - "# Returns streamed JSON output strings from export task results/errors, one by one\n", - "\n", - "\n", - "# Callback used for JSON Converter\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Uncomment to get stream results as a written file\n\n# Provide results with file converter\n\n# if export_task.has_errors():\n# export_task.get_buffered_stream(\n# converter=lb.FileConverter(file_path=\"./errors.txt\"),\n# stream_type=lb.StreamType.ERRORS\n# ).start()\n\n# if export_task.has_result():\n# export_task.get_buffered_stream(\n# converter=lb.FileConverter(file_path=\"./result.txt\"),\n# stream_type=lb.StreamType.RESULT\n# ).start()", + "cell_type": "code", "outputs": [], - "source": [ - "# Uncomment to get stream results as a written file\n", - "\n", - "# Provide results with file converter\n", - "\n", - "# if export_task.has_errors():\n", - "# export_task.get_buffered_stream(\n", - "# converter=lb.FileConverter(file_path=\"./errors.txt\"),\n", - "# stream_type=lb.StreamType.ERRORS\n", - "# ).start()\n", - "\n", - "# if export_task.has_result():\n", - "# export_task.get_buffered_stream(\n", - "# converter=lb.FileConverter(file_path=\"./result.txt\"),\n", - "# stream_type=lb.StreamType.RESULT\n", - "# ).start()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a dataset\n", @@ -307,141 +195,54 @@ "- `global_keys`\n", "\n", "See the _Export data rows from a project_ section above for additional details on each filter. " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the dataset ID of the dataset from which you wish to export data rows.\nDATASET_ID = \"\"\ndataset = client.get_dataset(DATASET_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the dataset ID of the dataset from which you wish to export data rows.\n", - "DATASET_ID = \"\"\n", - "dataset = client.get_dataset(DATASET_ID)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n}\n\nexport_task = dataset.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\", \"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"],\n", - "}\n", - "\n", - "export_task = dataset.export_v2(params=export_params, filters=filters)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\n# Note: Filters follow AND logic, so typically using one filter is sufficient.\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"],\n}\n\nexport_task = dataset.export(params=export_params, filters=filters)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\", \"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "# Note: Filters follow AND logic, so typically using one filter is sufficient.\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"],\n", - "}\n", - "\n", - "export_task = dataset.export(params=export_params, filters=filters)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Provide results with JSON converter\n", - "# Returns streamed JSON output strings from export task results/errors, one by one\n", - "\n", - "\n", - "# Callback used for JSON Converter\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from Catalog\n", @@ -452,122 +253,47 @@ "\n", "### Filters\n", "When exporting from catalog, you can apply the same filters as exporting from a dataset.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "catalog = client.get_catalog()", + "cell_type": "code", "outputs": [], - "source": [ - "catalog = client.get_catalog()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\",\"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"]\n}\nexport_task = catalog.export_v2(params=export_params, filters=filters)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\",\"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"]\n", - "}\n", - "export_task = catalog.export_v2(params=export_params, filters=filters)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "export_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\",\"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nfilters = {\n \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n # \"global_keys\": [\"\", \"\"],\n # \"data_row_ids\": [\"\", \"\"]\n}\n\nexport_task = catalog.export(params=export_params)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\",\"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "filters = {\n", - " \"last_activity_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"label_created_at\": [\"2000-01-01 00:00:00\", \"2050-01-01 00:00:00\"],\n", - " # \"global_keys\": [\"\", \"\"],\n", - " # \"data_row_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "export_task = catalog.export(params=export_params)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a slice\n", @@ -578,125 +304,54 @@ "\n", "### Filters\n", "No filters are applicable to exports from a slice. All the data rows of the slice must be exported." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the Catalog slice ID of the slice from which you wish to export data rows.\nCATALOG_SLICE_ID = \"\"\ncatalog_slice = client.get_catalog_slice(CATALOG_SLICE_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the Catalog slice ID of the slice from which you wish to export data rows.\n", - "CATALOG_SLICE_ID = \"\"\n", - "catalog_slice = client.get_catalog_slice(CATALOG_SLICE_ID)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nexport_task = catalog_slice.export_v2(params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\", \"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "export_task = catalog_slice.export_v2(params=export_params)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n # \"project_ids\": [\"\", \"\"],\n # \"model_run_ids\": [\"\", \"\"]\n}\n\nexport_task = catalog_slice.export(params=export_params)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - " # \"project_ids\": [\"\", \"\"],\n", - " # \"model_run_ids\": [\"\", \"\"]\n", - "}\n", - "\n", - "export_task = catalog_slice.export(params=export_params)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Provide results with JSON converter\n", - "# Returns streamed JSON output strings from export task results/errors, one by one\n", - "\n", - "\n", - "# Callback used for JSON Converter\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export data rows from a model run\n", @@ -713,117 +368,54 @@ "\n", "### Filters\n", "No filters are applicable to exports from a model run. All the data rows of the model run must be exported.\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the model run ID of the model run from which you wish to export data rows.\nMODEL_RUN_ID = \"\"\nmodel_run = client.get_model_run(MODEL_RUN_ID)", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the model run ID of the model run from which you wish to export data rows.\n", - "MODEL_RUN_ID = \"\"\n", - "model_run = client.get_model_run(MODEL_RUN_ID)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"interpolated_frames\": True,\n \"predictions\": True,\n \"embeddings\": True,\n}\n\nexport_task = model_run.export_v2(params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"predictions\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "export_task = model_run.export_v2(params=export_params)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"interpolated_frames\": True,\n \"predictions\": True,\n \"embeddings\": True,\n}\n\nexport_task = model_run.export(params=export_params)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"predictions\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "export_task = model_run.export(params=export_params)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Provide results with JSON converter\n", - "# Returns streamed JSON output strings from export task results/errors, one by one\n", - "\n", - "\n", - "# Callback used for JSON Converter\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Export Data Row\n", @@ -834,130 +426,52 @@ "\n", "### Filters\n", "No filters are applicable to export data rows. All the data rows specified in the export task are included." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Insert the global key of the data row you wish to export\nDATA_ROW_GLOBAL_KEY = \"\"", + "cell_type": "code", "outputs": [], - "source": [ - "# Insert the global key of the data row you wish to export\n", - "DATA_ROW_GLOBAL_KEY = \"\"" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Export V2 Method" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Provide a list of data row global keys\nexport_task = lb.DataRow.export_v2(client=client,\n global_keys=[DATA_ROW_GLOBAL_KEY],\n params=export_params)\nexport_task.wait_till_done()\n\nif export_task.errors:\n print(export_task.errors)\n\nexport_json = export_task.result\nprint(\"results: \", export_json)", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "# Provide a list of data row global keys\n", - "export_task = lb.DataRow.export_v2(client=client,\n", - " global_keys=[DATA_ROW_GLOBAL_KEY],\n", - " params=export_params)\n", - "export_task.wait_till_done()\n", - "\n", - "if export_task.errors:\n", - " print(export_task.errors)\n", - "\n", - "export_json = export_task.result\n", - "print(\"results: \", export_json)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### Stream Task Export Method\n", "The return type of this method is an ExportTask, instead of a Task. This is just a wrapper around Task, and most of its features are also present in ExportTask.\n", "This allows streaming of task results and errors." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Set the export params to include/exclude certain fields.\nexport_params = {\n \"attachments\": True,\n \"metadata_fields\": True,\n \"data_row_details\": True,\n \"project_details\": True,\n \"label_details\": True,\n \"performance_details\": True,\n \"interpolated_frames\": True,\n \"embeddings\": True,\n}\n\n# Provide a list of data row global keys\nexport_task = lb.DataRow.export(client=client,\n global_keys=[DATA_ROW_GLOBAL_KEY],\n params=export_params)\nexport_task.wait_till_done()", + "cell_type": "code", "outputs": [], - "source": [ - "# Set the export params to include/exclude certain fields.\n", - "export_params = {\n", - " \"attachments\": True,\n", - " \"metadata_fields\": True,\n", - " \"data_row_details\": True,\n", - " \"project_details\": True,\n", - " \"label_details\": True,\n", - " \"performance_details\": True,\n", - " \"interpolated_frames\": True,\n", - " \"embeddings\": True,\n", - "}\n", - "\n", - "# Provide a list of data row global keys\n", - "export_task = lb.DataRow.export(client=client,\n", - " global_keys=[DATA_ROW_GLOBAL_KEY],\n", - " params=export_params)\n", - "export_task.wait_till_done()" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Provide results with JSON converter\n# Returns streamed JSON output strings from export task results/errors, one by one\n\n\n# Callback used for JSON Converter\ndef json_stream_handler(output: lb.BufferedJsonConverterOutput):\n print(output.json)\n\n\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nprint(\n \"file size: \",\n export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n)\nprint(\n \"line count: \",\n export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n)", + "cell_type": "code", "outputs": [], - "source": [ - "# Provide results with JSON converter\n", - "# Returns streamed JSON output strings from export task results/errors, one by one\n", - "\n", - "\n", - "# Callback used for JSON Converter\n", - "def json_stream_handler(output: lb.BufferedJsonConverterOutput):\n", - " print(output.json)\n", - "\n", - "\n", - "if export_task.has_errors():\n", - " export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n", - " stream_handler=lambda error: print(error))\n", - "\n", - "if export_task.has_result():\n", - " export_json = export_task.get_buffered_stream(\n", - " stream_type=lb.StreamType.RESULT).start(\n", - " stream_handler=json_stream_handler)\n", - "\n", - "print(\n", - " \"file size: \",\n", - " export_task.get_total_file_size(stream_type=lb.StreamType.RESULT),\n", - ")\n", - "print(\n", - " \"line count: \",\n", - " export_task.get_total_lines(stream_type=lb.StreamType.RESULT),\n", - ")" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/prediction_upload/pdf_predictions.ipynb b/prediction_upload/pdf_predictions.ipynb index 950c917..8ab20f3 100644 --- a/prediction_upload/pdf_predictions.ipynb +++ b/prediction_upload/pdf_predictions.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,17 +24,17 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "# PDF Prediction Import " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "*Annotation types*\n", @@ -51,419 +53,115 @@ "- Bounding box \n", "- Entities \n", "- Relationships (only supported for MAL imports)" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import uuid\nimport json\nimport requests\nimport labelbox as lb\nimport labelbox.types as lb_types", + "cell_type": "code", "outputs": [], - "source": [ - "import uuid\n", - "import json\n", - "import requests\n", - "import labelbox as lb\n", - "import labelbox.types as lb_types" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Replace with your API key" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "API_KEY = \"\"\nclient = lb.Client(API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "API_KEY = \"\"\n", - "client = lb.Client(API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Supported Predictions" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########## Entity ##########\n\n# Annotation Types\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"named_entity\",\n textSelections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n)\n\n# NDJSON\nentities_prediction_ndjson = {\n \"name\":\n \"named_entity\",\n \"confidence\":\n 0.5,\n \"textSelections\": [{\n \"tokenIds\": [\"\",],\n \"groupId\": \"\",\n \"page\": 1,\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########## Entity ##########\n", - "\n", - "# Annotation Types\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"named_entity\",\n", - " textSelections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - ")\n", - "\n", - "# NDJSON\n", - "entities_prediction_ndjson = {\n", - " \"name\":\n", - " \"named_entity\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\",],\n", - " \"groupId\": \"\",\n", - " \"page\": 1,\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "########### Radio Classification #########\n\n# Annotation types\nradio_prediction = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\", confidence=0.5)),\n)\n# NDJSON\nradio_prediction_ndjson = {\n \"name\": \"radio_question\",\n \"answer\": {\n \"name\": \"first_radio_answer\",\n \"confidence\": 0.5\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "########### Radio Classification #########\n", - "\n", - "# Annotation types\n", - "radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\", confidence=0.5)),\n", - ")\n", - "# NDJSON\n", - "radio_prediction_ndjson = {\n", - " \"name\": \"radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Checklist Classification ###########\n\n# Annotation types\nchecklist_prediction = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n confidence=0.5),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n confidence=0.5),\n ]),\n)\n\n# NDJSON\nchecklist_prediction_ndjson = {\n \"name\":\n \"checklist_question\",\n \"answer\": [\n {\n \"name\": \"first_checklist_answer\",\n \"confidence\": 0.5\n },\n {\n \"name\": \"second_checklist_answer\",\n \"confidence\": 0.5\n },\n ],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Checklist Classification ###########\n", - "\n", - "# Annotation types\n", - "checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\",\n", - " confidence=0.5),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\",\n", - " confidence=0.5),\n", - " ]),\n", - ")\n", - "\n", - "# NDJSON\n", - "checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"checklist_question\",\n", - " \"answer\": [\n", - " {\n", - " \"name\": \"first_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " {\n", - " \"name\": \"second_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " ],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ Bounding Box ###########\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_prediction = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nbbox_prediction_ndjson = {\n \"name\": \"bounding_box\",\n \"bbox\": bbox_dim_1,\n \"page\": 0,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ Bounding Box ###########\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "bbox_prediction_ndjson = {\n", - " \"name\": \"bounding_box\",\n", - " \"bbox\": bbox_dim_1,\n", - " \"page\": 0,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# ############ global nested classifications ###########\n\nnested_checklist_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_checklist_prediction_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\":\n 0.5, # Confidence scores should be added to the answer\n },\n }],\n }],\n}\n\nnested_radio_prediction = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n confidence=0.5, # Confidence scores should be added to the answer\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=\n 0.5, # Confidence scores should be added to the answer\n )),\n )\n ],\n )),\n)\n\nnested_radio_prediction_ndjson = {\n \"name\": \"nested_radio_question\",\n \"answer\": {\n \"name\":\n \"first_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\",\n \"confidence\": 0.5\n },\n }],\n },\n}", + "cell_type": "code", "outputs": [], - "source": [ - "# ############ global nested classifications ###########\n", - "\n", - "nested_checklist_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_checklist_prediction_ndjson = {\n", - " \"name\":\n", - " \"nested_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\":\n", - " \"first_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\":\n", - " 0.5, # Confidence scores should be added to the answer\n", - " },\n", - " }],\n", - " }],\n", - "}\n", - "\n", - "nested_radio_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " confidence=0.5, # Confidence scores should be added to the answer\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=\n", - " 0.5, # Confidence scores should be added to the answer\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "nested_radio_prediction_ndjson = {\n", - " \"name\": \"nested_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"first_sub_radio_answer\",\n", - " \"confidence\": 0.5\n", - " },\n", - " }],\n", - " },\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############## Classification Free-form text ##############\n\ntext_prediction = lb_types.ClassificationAnnotation(\n name=\"free_text\", # must match your ontology feature\"s name\n value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n)\n\ntext_prediction_ndjson = {\n \"name\": \"free_text\",\n \"answer\": \"sample text\",\n \"confidence\": 0.5,\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############## Classification Free-form text ##############\n", - "\n", - "text_prediction = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", # must match your ontology feature\"s name\n", - " value=lb_types.Text(answer=\"sample text\", confidence=0.5),\n", - ")\n", - "\n", - "text_prediction_ndjson = {\n", - " \"name\": \"free_text\",\n", - " \"answer\": \"sample text\",\n", - " \"confidence\": 0.5,\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "######### BBOX with nested classifications #########\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n confidence=0.5,\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n confidence=0.5,\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\",\n confidence=0.5,\n )),\n )\n ],\n )),\n )\n ],\n)\n\nbbox_with_radio_subclass_prediction_ndjson = {\n \"name\": \"bbox_with_radio_subclass\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\":\n \"first_sub_radio_answer\",\n \"confidence\":\n 0.5,\n \"classifications\": [{\n \"name\": \"second_sub_radio_question\",\n \"answer\": {\n \"name\": \"second_sub_radio_answer\",\n \"confidence\": 0.5,\n },\n }],\n },\n }],\n \"bbox\": bbox_dim,\n \"page\": 1,\n \"unit\": \"POINTS\",\n}", + "cell_type": "code", "outputs": [], - "source": [ - "######### BBOX with nested classifications #########\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " confidence=0.5,\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\",\n", - " confidence=0.5,\n", - " )),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "bbox_with_radio_subclass_prediction_ndjson = {\n", - " \"name\": \"bbox_with_radio_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\":\n", - " \"first_sub_radio_answer\",\n", - " \"confidence\":\n", - " 0.5,\n", - " \"classifications\": [{\n", - " \"name\": \"second_sub_radio_question\",\n", - " \"answer\": {\n", - " \"name\": \"second_sub_radio_answer\",\n", - " \"confidence\": 0.5,\n", - " },\n", - " }],\n", - " },\n", - " }],\n", - " \"bbox\": bbox_dim,\n", - " \"page\": 1,\n", - " \"unit\": \"POINTS\",\n", - "}" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "############ NER with nested classifications ########\n\nner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n text_selections=[\n lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n ],\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n ],\n)\n\nner_with_checklist_subclass_prediction_ndjson = {\n \"name\":\n \"ner_with_checklist_subclass\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": [{\n \"name\": \"first_sub_checklist_answer\",\n \"confidence\": 0.5\n }],\n }],\n \"textSelections\": [{\n \"tokenIds\": [\"\"],\n \"groupId\": \"\",\n \"page\": 1\n }],\n}", + "cell_type": "code", "outputs": [], - "source": [ - "############ NER with nested classifications ########\n", - "\n", - "ner_with_checklist_subclass_prediction = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " text_selections=[\n", - " lb_types.DocumentTextSelection(token_ids=[], group_id=\"\", page=1)\n", - " ],\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_prediction_ndjson = {\n", - " \"name\":\n", - " \"ner_with_checklist_subclass\",\n", - " \"classifications\": [{\n", - " \"name\": \"sub_checklist_question\",\n", - " \"answer\": [{\n", - " \"name\": \"first_sub_checklist_answer\",\n", - " \"confidence\": 0.5\n", - " }],\n", - " }],\n", - " \"textSelections\": [{\n", - " \"tokenIds\": [\"\"],\n", - " \"groupId\": \"\",\n", - " \"page\": 1\n", - " }],\n", - "}" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 1: Import data rows into Catalog " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Passing a `text_layer_url` is not longer required. Labelbox automatically generates a text layer using Google Document AI and its OCR engine to detect tokens. \n", @@ -477,200 +175,60 @@ "For example, in a landscape-oriented PDF, the document is rotated by 90 degrees before processing. As a result, all tokens in the text layer are also rotated by 90 degrees.\n", "\n", "You may still pass a `text_layer_url` if you wish to bypass the automatic text layer generation" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\nimg_url = {\n \"row_data\": {\n \"pdf_url\":\n \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n },\n \"global_key\": global_key,\n}\n\ndataset = client.create_dataset(name=\"pdf_demo_dataset\")\ntask = dataset.create_data_rows([img_url])\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "global_key = \"0801.3483.pdf\" + str(uuid.uuid4())\n", - "img_url = {\n", - " \"row_data\": {\n", - " \"pdf_url\":\n", - " \"https://storage.googleapis.com/labelbox-datasets/arxiv-pdf/data/99-word-token-pdfs/0801.3483.pdf\"\n", - " },\n", - " \"global_key\": global_key,\n", - "}\n", - "\n", - "dataset = client.create_dataset(name=\"pdf_demo_dataset\")\n", - "task = dataset.create_data_rows([img_url])\n", - "task.wait_till_done()\n", - "print(f\"Failed data rows: {task.failed_data_rows}\")\n", - "print(f\"Errors: {task.errors}\")\n", - "\n", - "if task.errors:\n", - " for error in task.errors:\n", - " if (\"Duplicate global key\" in error[\"message\"] and\n", - " dataset.row_count == 0):\n", - " # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n", - " print(f\"Deleting empty dataset: {dataset}\")\n", - " dataset.delete()" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 2: Create/select an Ontology for your model predictions\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "## Setup the ontology and link the tools created above.\n\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"free_text\",\n scope=lb.Classification.Scope.GLOBAL,\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"nested_checklist_question\",\n scope=lb.Classification.Scope.GLOBAL,\n options=[\n lb.Option(\n \"first_checklist_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(\"first_sub_checklist_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n lb.Tool(\n tool=lb.Tool.Type.NER,\n name=\"ner_with_checklist_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"sub_checklist_question\",\n options=[lb.Option(value=\"first_sub_checklist_answer\")],\n )\n ],\n ),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.Option(\n value=\"first_sub_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"second_sub_radio_question\",\n options=[\n lb.Option(\"second_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Document Annotation Import Demo\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Document,\n)", + "cell_type": "code", "outputs": [], - "source": [ - "## Setup the ontology and link the tools created above.\n", - "\n", - "ontology_builder = lb.OntologyBuilder(\n", - " classifications=[ # List of Classification objects\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_radio_answer\"),\n", - " lb.Option(value=\"second_radio_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(value=\"first_checklist_answer\"),\n", - " lb.Option(value=\"second_checklist_answer\"),\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.TEXT,\n", - " name=\"free_text\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"nested_radio_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[lb.Option(\"first_sub_radio_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"nested_checklist_question\",\n", - " scope=lb.Classification.Scope.GLOBAL,\n", - " options=[\n", - " lb.Option(\n", - " \"first_checklist_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - " tools=[ # List of Tool objects\n", - " lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n", - " lb.Tool(tool=lb.Tool.Type.NER, name=\"named_entity\"),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.NER,\n", - " name=\"ner_with_checklist_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.CHECKLIST,\n", - " name=\"sub_checklist_question\",\n", - " options=[lb.Option(value=\"first_sub_checklist_answer\")],\n", - " )\n", - " ],\n", - " ),\n", - " lb.Tool(\n", - " tool=lb.Tool.Type.BBOX,\n", - " name=\"bbox_with_radio_subclass\",\n", - " classifications=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"sub_radio_question\",\n", - " options=[\n", - " lb.Option(\n", - " value=\"first_sub_radio_answer\",\n", - " options=[\n", - " lb.Classification(\n", - " class_type=lb.Classification.Type.RADIO,\n", - " name=\"second_sub_radio_question\",\n", - " options=[\n", - " lb.Option(\"second_sub_radio_answer\")\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " )\n", - " ],\n", - " ),\n", - " ],\n", - ")\n", - "\n", - "ontology = client.create_ontology(\n", - " \"Document Annotation Import Demo\",\n", - " ontology_builder.asdict(),\n", - " media_type=lb.MediaType.Document,\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 3: Create a Model and Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# create Model\nmodel = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n ontology_id=ontology.uid)\n# create Model Run\nmodel_run = model.create_model_run(\"iteration 1\")", + "cell_type": "code", "outputs": [], - "source": [ - "# create Model\n", - "model = client.create_model(name=\"PDF_model_run_\" + str(uuid.uuid4()),\n", - " ontology_id=ontology.uid)\n", - "# create Model Run\n", - "model_run = model.create_model_run(\"iteration 1\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 4: Send data rows to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "model_run.upsert_data_rows(global_keys=[global_key])", + "cell_type": "code", "outputs": [], - "source": [ - "model_run.upsert_data_rows(global_keys=[global_key])" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 5: Create the predictions payload\n", @@ -679,507 +237,184 @@ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", "\n", "The resulting payload should have exactly the same content for annotations that are supported by both" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To import ner annotations, you must pass a `text_layer_url`, Labelbox automatically generates a `text_layer_url` after importing a pdf asset that doesn't include a `text_layer_url`" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "To extract the generated text layer url we first need to export the data row" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "task = lb.DataRow.export(client=client, global_keys=[global_key])\ntask.wait_till_done()\nstream = task.get_buffered_stream()\n\ntext_layer = \"\"\nfor output in stream:\n output_json = output.json\n text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\nprint(text_layer)", + "cell_type": "code", "outputs": [], - "source": [ - "task = lb.DataRow.export(client=client, global_keys=[global_key])\n", - "task.wait_till_done()\n", - "stream = task.get_buffered_stream()\n", - "\n", - "text_layer = \"\"\n", - "for output in stream:\n", - " output_json = output.json\n", - " text_layer = output_json[\"media_attributes\"][\"text_layer_url\"]\n", - "print(text_layer)" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Helper method\ndef update_text_selections(annotation, group_id, list_tokens, page):\n return annotation.update({\n \"textSelections\": [{\n \"groupId\": group_id,\n \"tokenIds\": list_tokens,\n \"page\": page\n }]\n })\n\n\n# Fetch the content of the text layer\nres = requests.get(text_layer)\n\n# Phrases that we want to annotation obtained from the text layer url\ncontent_phrases = [\n \"Metal-insulator (MI) transitions have been one of the\",\n \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n]\n\n# Parse the text layer\ntext_selections = []\ntext_selections_ner = []\n\nfor obj in json.loads(res.text):\n for group in obj[\"groups\"]:\n if group[\"content\"] == content_phrases[0]:\n list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n document_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n text_selections.append(document_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=entities_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens, # ids representing individual words from the group\n page=1,\n )\n if group[\"content\"] == content_phrases[1]:\n list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n # build text selections for Python Annotation Types\n ner_text_selection = lb_types.DocumentTextSelection(\n groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n text_selections_ner.append(ner_text_selection)\n # build text selection for the NDJson annotations\n update_text_selections(\n annotation=ner_with_checklist_subclass_prediction_ndjson,\n group_id=group[\"id\"], # id representing group of words\n list_tokens=\n list_tokens_2, # ids representing individual words from the group\n page=1,\n )\n\n# re-write the entity annotation with text selections\nentities_prediction_document_entity = lb_types.DocumentEntity(\n name=\"named_entity\", confidence=0.5, textSelections=text_selections)\nentities_prediction = lb_types.ObjectAnnotation(\n name=\"named_entity\", value=entities_prediction_document_entity)\n\n# re-write the entity annotation + subclassification with text selections\nclassifications = [\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n confidence=0.5)\n ]),\n )\n]\nner_annotation_with_subclass = lb_types.DocumentEntity(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n textSelections=text_selections_ner,\n)\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n confidence=0.5,\n value=ner_annotation_with_subclass,\n classifications=classifications,\n)\n\n# Final NDJSON and python annotations\nprint(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\nprint(f\"entities_annotation={entities_prediction}\")\nprint(\n f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n)\nprint(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")", + "cell_type": "code", "outputs": [], - "source": [ - "# Helper method\n", - "def update_text_selections(annotation, group_id, list_tokens, page):\n", - " return annotation.update({\n", - " \"textSelections\": [{\n", - " \"groupId\": group_id,\n", - " \"tokenIds\": list_tokens,\n", - " \"page\": page\n", - " }]\n", - " })\n", - "\n", - "\n", - "# Fetch the content of the text layer\n", - "res = requests.get(text_layer)\n", - "\n", - "# Phrases that we want to annotation obtained from the text layer url\n", - "content_phrases = [\n", - " \"Metal-insulator (MI) transitions have been one of the\",\n", - " \"T. Sasaki, N. Yoneyama, and N. Kobayashi\",\n", - "]\n", - "\n", - "# Parse the text layer\n", - "text_selections = []\n", - "text_selections_ner = []\n", - "\n", - "for obj in json.loads(res.text):\n", - " for group in obj[\"groups\"]:\n", - " if group[\"content\"] == content_phrases[0]:\n", - " list_tokens = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " document_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens, page=1)\n", - " text_selections.append(document_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=entities_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - " if group[\"content\"] == content_phrases[1]:\n", - " list_tokens_2 = [x[\"id\"] for x in group[\"tokens\"]]\n", - " # build text selections for Python Annotation Types\n", - " ner_text_selection = lb_types.DocumentTextSelection(\n", - " groupId=group[\"id\"], tokenIds=list_tokens_2, page=1)\n", - " text_selections_ner.append(ner_text_selection)\n", - " # build text selection for the NDJson annotations\n", - " update_text_selections(\n", - " annotation=ner_with_checklist_subclass_prediction_ndjson,\n", - " group_id=group[\"id\"], # id representing group of words\n", - " list_tokens=\n", - " list_tokens_2, # ids representing individual words from the group\n", - " page=1,\n", - " )\n", - "\n", - "# re-write the entity annotation with text selections\n", - "entities_prediction_document_entity = lb_types.DocumentEntity(\n", - " name=\"named_entity\", confidence=0.5, textSelections=text_selections)\n", - "entities_prediction = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\", value=entities_prediction_document_entity)\n", - "\n", - "# re-write the entity annotation + subclassification with text selections\n", - "classifications = [\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\",\n", - " confidence=0.5)\n", - " ]),\n", - " )\n", - "]\n", - "ner_annotation_with_subclass = lb_types.DocumentEntity(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " textSelections=text_selections_ner,\n", - ")\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " confidence=0.5,\n", - " value=ner_annotation_with_subclass,\n", - " classifications=classifications,\n", - ")\n", - "\n", - "# Final NDJSON and python annotations\n", - "print(f\"entities_annotations_ndjson={entities_prediction_ndjson}\")\n", - "print(f\"entities_annotation={entities_prediction}\")\n", - "print(\n", - " f\"nested_entities_annotation_ndjson={ner_with_checklist_subclass_prediction_ndjson}\"\n", - ")\n", - "print(f\"nested_entities_annotation={ner_with_checklist_subclass_annotation}\")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "Python annotation \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions = []\n\nlabel_predictions.append(\n lb_types.Label(\n data=lb_types.DocumentData(global_key=global_key),\n annotations=[\n entities_prediction,\n checklist_prediction,\n nested_checklist_prediction,\n text_prediction,\n radio_prediction,\n nested_radio_prediction,\n bbox_prediction,\n bbox_with_radio_subclass_prediction,\n ner_with_checklist_subclass_prediction,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions = []\n", - "\n", - "label_predictions.append(\n", - " lb_types.Label(\n", - " data=lb_types.DocumentData(global_key=global_key),\n", - " annotations=[\n", - " entities_prediction,\n", - " checklist_prediction,\n", - " nested_checklist_prediction,\n", - " text_prediction,\n", - " radio_prediction,\n", - " nested_radio_prediction,\n", - " bbox_prediction,\n", - " bbox_with_radio_subclass_prediction,\n", - " ner_with_checklist_subclass_prediction,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "If using NDJSON: " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "label_predictions_ndjson = []\nfor annot in [\n entities_prediction_ndjson,\n checklist_prediction_ndjson,\n nested_checklist_prediction_ndjson,\n text_prediction_ndjson,\n radio_prediction_ndjson,\n nested_radio_prediction_ndjson,\n bbox_prediction_ndjson,\n bbox_with_radio_subclass_prediction_ndjson,\n ner_with_checklist_subclass_prediction_ndjson,\n]:\n annot.update({\n \"dataRow\": {\n \"globalKey\": global_key\n },\n })\n label_predictions_ndjson.append(annot)", + "cell_type": "code", "outputs": [], - "source": [ - "label_predictions_ndjson = []\n", - "for annot in [\n", - " entities_prediction_ndjson,\n", - " checklist_prediction_ndjson,\n", - " nested_checklist_prediction_ndjson,\n", - " text_prediction_ndjson,\n", - " radio_prediction_ndjson,\n", - " nested_radio_prediction_ndjson,\n", - " bbox_prediction_ndjson,\n", - " bbox_with_radio_subclass_prediction_ndjson,\n", - " ner_with_checklist_subclass_prediction_ndjson,\n", - "]:\n", - " annot.update({\n", - " \"dataRow\": {\n", - " \"globalKey\": global_key\n", - " },\n", - " })\n", - " label_predictions_ndjson.append(annot)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 6: Upload the predictions payload to the Model Run" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Upload the prediction label to the Model Run\nupload_job_prediction = model_run.add_predictions(\n name=\"prediction_upload_job\" + str(uuid.uuid4()),\n predictions=label_predictions,\n)\n\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_prediction.errors)\nprint(\"Status of uploads: \", upload_job_prediction.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "# Upload the prediction label to the Model Run\n", - "upload_job_prediction = model_run.add_predictions(\n", - " name=\"prediction_upload_job\" + str(uuid.uuid4()),\n", - " predictions=label_predictions,\n", - ")\n", - "\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_prediction.errors)\n", - "print(\"Status of uploads: \", upload_job_prediction.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Step 7: Send annotations to the Model Run\n", "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.1 Create a labelbox project \n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project = client.create_project(name=\"Document Prediction Import Demo\",\n media_type=lb.MediaType.Document)\nproject.connect_ontology(ontology)", + "cell_type": "code", "outputs": [], - "source": [ - "project = client.create_project(name=\"Document Prediction Import Demo\",\n", - " media_type=lb.MediaType.Document)\n", - "project.connect_ontology(ontology)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.2 Create a batch to send to the project " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "project.create_batch(\n \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n global_keys=[\n global_key\n ], # Paginated collection of data row objects, list of data row ids or global keys\n priority=5, # priority between 1(Highest) - 5(lowest)\n)", + "cell_type": "code", "outputs": [], - "source": [ - "project.create_batch(\n", - " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n", - " global_keys=[\n", - " global_key\n", - " ], # Paginated collection of data row objects, list of data row ids or global keys\n", - " priority=5, # priority between 1(Highest) - 5(lowest)\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.3 Create the annotations payload" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "entities_annotation = lb_types.ObjectAnnotation(\n name=\"named_entity\",\n value=lb_types.DocumentEntity(name=\"named_entity\",\n textSelections=text_selections),\n)\n\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\")),\n)\n\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\n\nbbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\", # must match your ontology feature\"s name\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim_1[\"left\"],\n y=bbox_dim_1[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n ), # x= left + width , y = top + height\n page=0,\n unit=lb_types.RectangleUnit.POINTS,\n ),\n)\n\nnested_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\",)\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",)),\n )\n ],\n )),\n)\n\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n\nbbox_dim = {\n \"top\": 226.757,\n \"left\": 317.271,\n \"height\": 194.229,\n \"width\": 249.386,\n}\n\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.DocumentRectangle(\n start=lb_types.Point(x=bbox_dim[\"left\"],\n y=bbox_dim[\"top\"]), # x = left, y = top\n end=lb_types.Point(\n x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n ), # x= left + width , y = top + height\n unit=lb_types.RectangleUnit.POINTS,\n page=1,\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"second_sub_radio_question\",\n value=lb_types.Radio(\n answer=lb_types.ClassificationAnswer(\n name=\"second_sub_radio_answer\")),\n )\n ],\n )),\n )\n ],\n)\n\nner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"ner_with_checklist_subclass\",\n value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n text_selections=text_selections_ner),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n)", + "cell_type": "code", "outputs": [], - "source": [ - "entities_annotation = lb_types.ObjectAnnotation(\n", - " name=\"named_entity\",\n", - " value=lb_types.DocumentEntity(name=\"named_entity\",\n", - " textSelections=text_selections),\n", - ")\n", - "\n", - "radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\")),\n", - ")\n", - "\n", - "checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n", - " lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n", - " ]),\n", - ")\n", - "\n", - "bbox_dim_1 = {\"top\": 135.3, \"left\": 102.771, \"height\": 109.843, \"width\": 415.8}\n", - "bbox_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bounding_box\", # must match your ontology feature\"s name\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim_1[\"left\"],\n", - " y=bbox_dim_1[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim_1[\"left\"] + bbox_dim_1[\"width\"],\n", - " y=bbox_dim_1[\"top\"] + bbox_dim_1[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " page=0,\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " ),\n", - ")\n", - "\n", - "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_checklist_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(\n", - " name=\"first_sub_checklist_answer\",)\n", - " ]),\n", - " )\n", - " ],\n", - " )\n", - " ]),\n", - ")\n", - "\n", - "nested_radio_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"nested_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",)),\n", - " )\n", - " ],\n", - " )),\n", - ")\n", - "\n", - "text_annotation = lb_types.ClassificationAnnotation(\n", - " name=\"free_text\", value=lb_types.Text(answer=\"sample text\"))\n", - "\n", - "bbox_dim = {\n", - " \"top\": 226.757,\n", - " \"left\": 317.271,\n", - " \"height\": 194.229,\n", - " \"width\": 249.386,\n", - "}\n", - "\n", - "bbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"bbox_with_radio_subclass\",\n", - " value=lb_types.DocumentRectangle(\n", - " start=lb_types.Point(x=bbox_dim[\"left\"],\n", - " y=bbox_dim[\"top\"]), # x = left, y = top\n", - " end=lb_types.Point(\n", - " x=bbox_dim[\"left\"] + bbox_dim[\"width\"],\n", - " y=bbox_dim[\"top\"] + bbox_dim[\"height\"],\n", - " ), # x= left + width , y = top + height\n", - " unit=lb_types.RectangleUnit.POINTS,\n", - " page=1,\n", - " ),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_radio_question\",\n", - " value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n", - " name=\"first_sub_radio_answer\",\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"second_sub_radio_question\",\n", - " value=lb_types.Radio(\n", - " answer=lb_types.ClassificationAnswer(\n", - " name=\"second_sub_radio_answer\")),\n", - " )\n", - " ],\n", - " )),\n", - " )\n", - " ],\n", - ")\n", - "\n", - "ner_with_checklist_subclass_annotation = lb_types.ObjectAnnotation(\n", - " name=\"ner_with_checklist_subclass\",\n", - " value=lb_types.DocumentEntity(name=\"ner_with_checklist_subclass\",\n", - " text_selections=text_selections_ner),\n", - " classifications=[\n", - " lb_types.ClassificationAnnotation(\n", - " name=\"sub_checklist_question\",\n", - " value=lb_types.Checklist(answer=[\n", - " lb_types.ClassificationAnswer(name=\"first_sub_checklist_answer\")\n", - " ]),\n", - " )\n", - " ],\n", - ")" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.4 Create the label object " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "labels = []\n\nlabels.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[\n entities_annotation,\n checklist_annotation,\n nested_checklist_annotation,\n text_annotation,\n radio_annotation,\n nested_radio_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n ner_with_checklist_subclass_annotation,\n ],\n ))", + "cell_type": "code", "outputs": [], - "source": [ - "labels = []\n", - "\n", - "labels.append(\n", - " lb_types.Label(\n", - " data={\"global_key\": global_key},\n", - " annotations=[\n", - " entities_annotation,\n", - " checklist_annotation,\n", - " nested_checklist_annotation,\n", - " text_annotation,\n", - " radio_annotation,\n", - " nested_radio_annotation,\n", - " bbox_annotation,\n", - " bbox_with_radio_subclass_annotation,\n", - " ner_with_checklist_subclass_annotation,\n", - " ],\n", - " ))" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.5 Upload annotations to the project using Label import\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "upload_job_annotation = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"text_label_import_job\" + str(uuid.uuid4()),\n labels=labels,\n)\n\nupload_job_annotation.wait_until_done()\n# Errors will appear for annotation uploads that failed.\nprint(\"Errors:\", upload_job_annotation.errors)\nprint(\"Status of uploads: \", upload_job_annotation.statuses)", + "cell_type": "code", "outputs": [], - "source": [ - "upload_job_annotation = lb.LabelImport.create_from_objects(\n", - " client=client,\n", - " project_id=project.uid,\n", - " name=\"text_label_import_job\" + str(uuid.uuid4()),\n", - " labels=labels,\n", - ")\n", - "\n", - "upload_job_annotation.wait_until_done()\n", - "# Errors will appear for annotation uploads that failed.\n", - "print(\"Errors:\", upload_job_annotation.errors)\n", - "print(\"Status of uploads: \", upload_job_annotation.statuses)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "7.6 Send the annotations to the Model Run " - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# get the labels id from the project\nmodel_run.upsert_labels(project_id=project.uid)", + "cell_type": "code", "outputs": [], - "source": [ - "# get the labels id from the project\n", - "model_run.upsert_labels(project_id=project.uid)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Option deletions for cleanup" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# project.delete()\n# dataset.delete()", + "cell_type": "code", "outputs": [], - "source": [ - "# project.delete()\n", - "# dataset.delete()" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file diff --git a/requirements-dev.lock b/requirements-dev.lock index d0f5cda..9e45e2c 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -11,10 +11,10 @@ annotated-types==0.7.0 # via pydantic -asttokens==2.4.1 +asttokens==3.0.0 # via stack-data -black==24.8.0 -click==8.1.7 +black==25.1.0 +click==8.1.8 # via black # via typer commonmark==0.9.1 @@ -22,84 +22,78 @@ commonmark==0.9.1 databooks==1.3.10 decorator==5.1.1 # via ipython -executing==2.1.0 +executing==2.2.0 # via stack-data -gitdb==4.0.11 +gitdb==4.0.12 # via gitpython -gitpython==3.1.43 +gitpython==3.1.44 # via databooks -importlib-metadata==8.5.0 - # via yapf -ipython==8.27.0 +ipython==8.32.0 # via black -jedi==0.19.1 +jedi==0.19.2 # via ipython matplotlib-inline==0.1.7 # via ipython mypy-extensions==1.0.0 # via black -numpy==2.1.1 +numpy==2.2.3 # via pandas -packaging==24.1 +packaging==24.2 # via black -pandas==2.2.2 +pandas==2.2.3 parso==0.8.4 # via jedi pathspec==0.12.1 # via black pexpect==4.9.0 # via ipython -platformdirs==4.3.2 +platformdirs==4.3.6 # via black # via yapf -prompt-toolkit==3.0.47 +prompt-toolkit==3.0.50 # via ipython ptyprocess==0.7.0 # via pexpect pure-eval==0.2.3 # via stack-data -pydantic==2.9.1 +pydantic==2.10.6 # via databooks -pydantic-core==2.23.3 +pydantic-core==2.27.2 # via pydantic -pygments==2.18.0 +pygments==2.19.1 # via ipython # via rich python-dateutil==2.9.0.post0 # via pandas -pytz==2024.2 +pytz==2025.1 # via pandas rich==12.6.0 # via databooks # via typer shellingham==1.5.4 # via typer -six==1.16.0 - # via asttokens +six==1.17.0 # via python-dateutil -smmap==5.0.1 +smmap==5.0.2 # via gitdb stack-data==0.6.3 # via ipython -tokenize-rt==6.0.0 +tokenize-rt==6.1.0 # via black -tomli==2.0.1 +tomli==2.2.1 # via databooks - # via yapf traitlets==5.14.3 # via ipython # via matplotlib-inline -typer==0.12.5 +typer==0.15.1 # via databooks typing-extensions==4.12.2 # via databooks # via pydantic # via pydantic-core # via typer -tzdata==2024.1 +tzdata==2025.1 # via pandas wcwidth==0.2.13 # via prompt-toolkit -yapf==0.40.2 -zipp==3.20.1 - # via importlib-metadata +yapf==0.43.0