diff --git a/examples/CONTRIBUTING.md b/examples/CONTRIBUTING.md index bb7dd5999..b747cd104 100644 --- a/examples/CONTRIBUTING.md +++ b/examples/CONTRIBUTING.md @@ -12,7 +12,7 @@ Thank you for contributing to our notebook examples! To ensure that your contrib ## General Notebook Requirements -Review our [template notebook](template.ipynbs) for general overview on how notebooks should be structure. This notebook and section just serves as a guide and exceptions can be made. Here are our general requirements: +Review our [template notebook](template.ipynbs) for general overview on how notebooks should be structure. The template notebook and this section just serves as a guide and exceptions can be made. Here are our general requirements: 1. Ensure that any modified notebooks run when edited. 2. Ensure that you update any relevant headers and comments within the code block you may add or change. diff --git a/examples/README.md b/examples/README.md index 6e8faebec..8abd54521 100644 --- a/examples/README.md +++ b/examples/README.md @@ -153,11 +153,6 @@ Open In Github Open In Colab - - Import Labeled Dataset Image - Open In Github - Open In Colab - PDF Open In Github diff --git a/examples/annotation_import/import_labeled_dataset_image.ipynb b/examples/annotation_import/import_labeled_dataset_image.ipynb deleted file mode 100644 index 9ebd52883..000000000 --- a/examples/annotation_import/import_labeled_dataset_image.ipynb +++ /dev/null @@ -1,185 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "", - " ", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "import labelbox as lb\nfrom labelbox.schema.data_row_metadata import (\n DataRowMetadataField,\n DataRowMetadataKind,\n)\nimport datetime\nimport random\nimport os\nimport json\nfrom PIL import Image\nfrom labelbox.schema.ontology import OntologyBuilder, Tool\nimport requests\nfrom tqdm.notebook import tqdm\nimport uuid\nfrom labelbox.data.annotation_types import (\n Label,\n ImageData,\n ObjectAnnotation,\n Rectangle,\n Point,\n)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup Labelbox client" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Initialize the Labelbox client\nAPI_KEY = \"\"\nclient = lb.Client(API_KEY)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Download a public dataset\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Function to download files\ndef download_files(filemap):\n path, uri = filemap\n if not os.path.exists(path):\n response = requests.get(uri, stream=True)\n with open(path, \"wb\") as f:\n for chunk in response.iter_content(chunk_size=8192):\n f.write(chunk)\n return path", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Download data rows and annotations\nDATA_ROWS_URL = \"https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_datarows.json\"\nANNOTATIONS_URL = \"https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_annotations.json\"\ndownload_files((\"data_rows.json\", DATA_ROWS_URL))\ndownload_files((\"annotations.json\", ANNOTATIONS_URL))", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Load data rows and annotations\nwith open(\"data_rows.json\") as fp:\n data_rows = json.load(fp)\nwith open(\"annotations.json\") as fp:\n annotations = json.load(fp)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Create a dataset" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Create a new dataset\ndataset = client.create_dataset(name=\"Geospatial vessel detection\")\nprint(f\"Created dataset with ID: {dataset.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Import Data Rows with Metadata" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Here is an example of adding two metadata fields to your Data Rows: a \"captureDateTime\" field with datetime value, and a \"tag\" field with string value\nmetadata_ontology = client.get_data_row_metadata_ontology()\ndatetime_schema_id = metadata_ontology.reserved_by_name[\"captureDateTime\"].uid\ntag_schema_id = metadata_ontology.reserved_by_name[\"tag\"].uid\ntag_items = [\"WorldView-1\", \"WorldView-2\", \"WorldView-3\", \"WorldView-4\"]\n\nfor datarow in tqdm(data_rows):\n dt = datetime.datetime.utcnow() + datetime.timedelta(\n days=random.random() * 30) # this is random datetime value\n tag_item = random.choice(tag_items) # this is a random tag value\n\n # Option 1: Specify metadata with a list of DataRowMetadataField. This is the recommended option since it comes with validation for metadata fields.\n metadata_fields = [\n DataRowMetadataField(schema_id=datetime_schema_id, value=dt),\n DataRowMetadataField(schema_id=tag_schema_id, value=tag_item),\n ]\n\n # Option 2: Uncomment to try. Alternatively, you can specify the metadata fields with dictionary format without declaring the DataRowMetadataField objects. It is equivalent to Option 1.\n # metadata_fields = [\n # {\"schema_id\": datetime_schema_id, \"value\": dt},\n # {\"schema_id\": tag_schema_id, \"value\": tag_item}\n # ]\n\n datarow[\"metadata_fields\"] = metadata_fields", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "task = dataset.create_data_rows(data_rows)\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "Examine a Data Row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "datarow = next(dataset.data_rows())\nprint(datarow)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup a labeling project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Initialize the OntologyBuilder\nontology_builder = OntologyBuilder()\n\n# Assuming 'annotations' is defined and contains the necessary data\nfor category in annotations[\"categories\"]:\n print(category[\"name\"])\n # Add tools to the ontology builder\n ontology_builder.add_tool(Tool(tool=Tool.Type.BBOX, name=category[\"name\"]))\n\n# Create the ontology in Labelbox\nontology = client.create_ontology(\n \"Vessel Detection Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\nprint(f\"Created ontology with ID: {ontology.uid}\")\n\n# Create a project and set up the ontology\nproject = client.create_project(name=\"Vessel Detection\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology=ontology)\nprint(f\"Created project with ID: {project.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Send a batch of data rows to the project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "client.enable_experimental = True\n\n# Minimal ExportV2 parameters focused solely on data row IDs\nexport_params = {\"data_row_details\": True} # Only export data row details\n\n# Initiate the streamable export task from catalog\ndataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\nexport_task = dataset.export(params=export_params)\nexport_task.wait_till_done()\nprint(export_task)\n\ndata_rows = []\n\n\n# Callback used for JSON Converter to correctly collect data row IDs\ndef json_stream_handler(output: lb.JsonConverterOutput):\n # Parse the JSON string to access the data\n data = json.loads(output.json_str)\n\n # Correctly extract and append DataRow ID\n if \"data_row\" in data and \"id\" in data[\"data_row\"]:\n data_rows.append(data[\"data_row\"][\"id\"])\n\n\n# Process the stream if there are results\nif export_task.has_result():\n export_task.get_stream(converter=lb.JsonConverter(),\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\n# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\nsampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n\n# Create a new batch in the project and add the sampled data rows\nbatch = project.create_batch(\n \"Initial batch\", # name of the batch\n sampled_data_rows, # list of Data Rows\n 1, # priority between 1-5\n)\nprint(f\"Created batch with ID: {batch.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Create annotations payload" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Set export parameters focused on data row details\nexport_params = {\n \"data_row_details\": True, # Only export data row details\n \"batch_ids\": [batch.uid\n ], # Optional: Include batch ids to filter by specific batches\n}\n\n# Initialize the streamable export task from project\nexport_task = project.export(params=export_params)\nexport_task.wait_till_done()\n\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.JsonConverterOutput):\n data_row = json.loads(output.json_str)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_stream(converter=lb.JsonConverter(),\n stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_stream(\n converter=lb.JsonConverter(), stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nlabels = []\nfor datarow in data_rows:\n annotations_list = []\n # Access the 'data_row' dictionary first\n data_row_dict = datarow[\"data_row\"]\n folder = data_row_dict[\"external_id\"].split(\"/\")[0]\n id = data_row_dict[\"external_id\"].split(\"/\")[1]\n\n if folder == \"positive_image_set\":\n for image in annotations[\"images\"]:\n if image[\"file_name\"] == id:\n for annotation in annotations[\"annotations\"]:\n if annotation[\"image_id\"] == image[\"id\"]:\n bbox = annotation[\"bbox\"]\n category_id = annotation[\"category_id\"] - 1\n class_name = None\n ontology = (ontology_builder.asdict()\n ) # Get the ontology dictionary\n for category in ontology[\"tools\"]:\n if (category[\"name\"] == annotations[\"categories\"]\n [category_id][\"name\"]):\n class_name = category[\"name\"]\n break\n if class_name:\n annotations_list.append(\n ObjectAnnotation(\n name=class_name,\n value=Rectangle(\n start=Point(x=bbox[0], y=bbox[1]),\n end=Point(\n x=bbox[2] + bbox[0],\n y=bbox[3] + bbox[1],\n ),\n ),\n ))\n image_data = {\"uid\": data_row_dict[\"id\"]}\n labels.append(Label(data=image_data, annotations=annotations_list))", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"label_import_job_{str(uuid.uuid4())}\",\n labels=labels,\n)\n\n# Wait for the upload to finish and print the results\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")\nprint(f\"Status of uploads: {upload_job.statuses}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file diff --git a/examples/pyproject.toml b/examples/pyproject.toml index 05b4ed0af..152e59473 100644 --- a/examples/pyproject.toml +++ b/examples/pyproject.toml @@ -5,7 +5,7 @@ description = "Labelbox Python Example Notebooks" authors = [{ name = "Labelbox", email = "docs@labelbox.com" }] readme = "README.md" # Python version matches labelbox SDK -requires-python = ">=3." +requires-python = ">=3.8" dependencies = [] [project.urls] diff --git a/examples/template.ipynb b/examples/template.ipynb index 6a052a9b6..0411a416b 100644 --- a/examples/template.ipynb +++ b/examples/template.ipynb @@ -101,7 +101,7 @@ { "metadata": {}, "source": [ - "* Include information on step" + "* Include information about step" ], "cell_type": "markdown" },