From 861306c6dfa80c53be468d384dba3f7304e64530 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Fri, 24 May 2024 16:49:01 -0500 Subject: [PATCH 1/4] removed uneeded notebook ths is the last one that needs gone --- .../import_labeled_dataset_image.ipynb | 185 ------------------ examples/pyproject.toml | 2 +- 2 files changed, 1 insertion(+), 186 deletions(-) delete mode 100644 examples/annotation_import/import_labeled_dataset_image.ipynb diff --git a/examples/annotation_import/import_labeled_dataset_image.ipynb b/examples/annotation_import/import_labeled_dataset_image.ipynb deleted file mode 100644 index 9ebd52883..000000000 --- a/examples/annotation_import/import_labeled_dataset_image.ipynb +++ /dev/null @@ -1,185 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": {}, - "cells": [ - { - "metadata": {}, - "source": [ - "", - " ", - "\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "import labelbox as lb\nfrom labelbox.schema.data_row_metadata import (\n DataRowMetadataField,\n DataRowMetadataKind,\n)\nimport datetime\nimport random\nimport os\nimport json\nfrom PIL import Image\nfrom labelbox.schema.ontology import OntologyBuilder, Tool\nimport requests\nfrom tqdm.notebook import tqdm\nimport uuid\nfrom labelbox.data.annotation_types import (\n Label,\n ImageData,\n ObjectAnnotation,\n Rectangle,\n Point,\n)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup Labelbox client" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Initialize the Labelbox client\nAPI_KEY = \"\"\nclient = lb.Client(API_KEY)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Download a public dataset\n" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Function to download files\ndef download_files(filemap):\n path, uri = filemap\n if not os.path.exists(path):\n response = requests.get(uri, stream=True)\n with open(path, \"wb\") as f:\n for chunk in response.iter_content(chunk_size=8192):\n f.write(chunk)\n return path", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Download data rows and annotations\nDATA_ROWS_URL = \"https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_datarows.json\"\nANNOTATIONS_URL = \"https://storage.googleapis.com/labelbox-datasets/VHR_geospatial/geospatial_annotations.json\"\ndownload_files((\"data_rows.json\", DATA_ROWS_URL))\ndownload_files((\"annotations.json\", ANNOTATIONS_URL))", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "# Load data rows and annotations\nwith open(\"data_rows.json\") as fp:\n data_rows = json.load(fp)\nwith open(\"annotations.json\") as fp:\n annotations = json.load(fp)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Create a dataset" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Create a new dataset\ndataset = client.create_dataset(name=\"Geospatial vessel detection\")\nprint(f\"Created dataset with ID: {dataset.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Import Data Rows with Metadata" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Here is an example of adding two metadata fields to your Data Rows: a \"captureDateTime\" field with datetime value, and a \"tag\" field with string value\nmetadata_ontology = client.get_data_row_metadata_ontology()\ndatetime_schema_id = metadata_ontology.reserved_by_name[\"captureDateTime\"].uid\ntag_schema_id = metadata_ontology.reserved_by_name[\"tag\"].uid\ntag_items = [\"WorldView-1\", \"WorldView-2\", \"WorldView-3\", \"WorldView-4\"]\n\nfor datarow in tqdm(data_rows):\n dt = datetime.datetime.utcnow() + datetime.timedelta(\n days=random.random() * 30) # this is random datetime value\n tag_item = random.choice(tag_items) # this is a random tag value\n\n # Option 1: Specify metadata with a list of DataRowMetadataField. This is the recommended option since it comes with validation for metadata fields.\n metadata_fields = [\n DataRowMetadataField(schema_id=datetime_schema_id, value=dt),\n DataRowMetadataField(schema_id=tag_schema_id, value=tag_item),\n ]\n\n # Option 2: Uncomment to try. Alternatively, you can specify the metadata fields with dictionary format without declaring the DataRowMetadataField objects. It is equivalent to Option 1.\n # metadata_fields = [\n # {\"schema_id\": datetime_schema_id, \"value\": dt},\n # {\"schema_id\": tag_schema_id, \"value\": tag_item}\n # ]\n\n datarow[\"metadata_fields\"] = metadata_fields", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "task = dataset.create_data_rows(data_rows)\ntask.wait_till_done()\nprint(f\"Failed data rows: {task.failed_data_rows}\")\nprint(f\"Errors: {task.errors}\")\n\nif task.errors:\n for error in task.errors:\n if (\"Duplicate global key\" in error[\"message\"] and\n dataset.row_count == 0):\n # If the global key already exists in the workspace the dataset will be created empty, so we can delete it.\n print(f\"Deleting empty dataset: {dataset}\")\n dataset.delete()", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "Examine a Data Row" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "datarow = next(dataset.data_rows())\nprint(datarow)", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Setup a labeling project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Initialize the OntologyBuilder\nontology_builder = OntologyBuilder()\n\n# Assuming 'annotations' is defined and contains the necessary data\nfor category in annotations[\"categories\"]:\n print(category[\"name\"])\n # Add tools to the ontology builder\n ontology_builder.add_tool(Tool(tool=Tool.Type.BBOX, name=category[\"name\"]))\n\n# Create the ontology in Labelbox\nontology = client.create_ontology(\n \"Vessel Detection Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\nprint(f\"Created ontology with ID: {ontology.uid}\")\n\n# Create a project and set up the ontology\nproject = client.create_project(name=\"Vessel Detection\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology=ontology)\nprint(f\"Created project with ID: {project.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Send a batch of data rows to the project" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "client.enable_experimental = True\n\n# Minimal ExportV2 parameters focused solely on data row IDs\nexport_params = {\"data_row_details\": True} # Only export data row details\n\n# Initiate the streamable export task from catalog\ndataset = client.get_dataset(dataset.uid) # Update with the actual dataset ID\nexport_task = dataset.export(params=export_params)\nexport_task.wait_till_done()\nprint(export_task)\n\ndata_rows = []\n\n\n# Callback used for JSON Converter to correctly collect data row IDs\ndef json_stream_handler(output: lb.JsonConverterOutput):\n # Parse the JSON string to access the data\n data = json.loads(output.json_str)\n\n # Correctly extract and append DataRow ID\n if \"data_row\" in data and \"id\" in data[\"data_row\"]:\n data_rows.append(data[\"data_row\"][\"id\"])\n\n\n# Process the stream if there are results\nif export_task.has_result():\n export_task.get_stream(converter=lb.JsonConverter(),\n stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\n# Randomly select 200 Data Rows (or fewer if the dataset has less than 200 data rows)\nsampled_data_rows = random.sample(data_rows, min(len(data_rows), 200))\n\n# Create a new batch in the project and add the sampled data rows\nbatch = project.create_batch(\n \"Initial batch\", # name of the batch\n sampled_data_rows, # list of Data Rows\n 1, # priority between 1-5\n)\nprint(f\"Created batch with ID: {batch.uid}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": [ - "# Create annotations payload" - ], - "cell_type": "markdown" - }, - { - "metadata": {}, - "source": "# Set export parameters focused on data row details\nexport_params = {\n \"data_row_details\": True, # Only export data row details\n \"batch_ids\": [batch.uid\n ], # Optional: Include batch ids to filter by specific batches\n}\n\n# Initialize the streamable export task from project\nexport_task = project.export(params=export_params)\nexport_task.wait_till_done()\n\ndata_rows = []\n\n\ndef json_stream_handler(output: lb.JsonConverterOutput):\n data_row = json.loads(output.json_str)\n data_rows.append(data_row)\n\n\nif export_task.has_errors():\n export_task.get_stream(converter=lb.JsonConverter(),\n stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_stream(\n converter=lb.JsonConverter(), stream_type=lb.StreamType.RESULT).start(\n stream_handler=json_stream_handler)\n\nlabels = []\nfor datarow in data_rows:\n annotations_list = []\n # Access the 'data_row' dictionary first\n data_row_dict = datarow[\"data_row\"]\n folder = data_row_dict[\"external_id\"].split(\"/\")[0]\n id = data_row_dict[\"external_id\"].split(\"/\")[1]\n\n if folder == \"positive_image_set\":\n for image in annotations[\"images\"]:\n if image[\"file_name\"] == id:\n for annotation in annotations[\"annotations\"]:\n if annotation[\"image_id\"] == image[\"id\"]:\n bbox = annotation[\"bbox\"]\n category_id = annotation[\"category_id\"] - 1\n class_name = None\n ontology = (ontology_builder.asdict()\n ) # Get the ontology dictionary\n for category in ontology[\"tools\"]:\n if (category[\"name\"] == annotations[\"categories\"]\n [category_id][\"name\"]):\n class_name = category[\"name\"]\n break\n if class_name:\n annotations_list.append(\n ObjectAnnotation(\n name=class_name,\n value=Rectangle(\n start=Point(x=bbox[0], y=bbox[1]),\n end=Point(\n x=bbox[2] + bbox[0],\n y=bbox[3] + bbox[1],\n ),\n ),\n ))\n image_data = {\"uid\": data_row_dict[\"id\"]}\n labels.append(Label(data=image_data, annotations=annotations_list))", - "cell_type": "code", - "outputs": [], - "execution_count": null - }, - { - "metadata": {}, - "source": "upload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"label_import_job_{str(uuid.uuid4())}\",\n labels=labels,\n)\n\n# Wait for the upload to finish and print the results\nupload_job.wait_until_done()\n\nprint(f\"Errors: {upload_job.errors}\")\nprint(f\"Status of uploads: {upload_job.statuses}\")", - "cell_type": "code", - "outputs": [], - "execution_count": null - } - ] -} \ No newline at end of file diff --git a/examples/pyproject.toml b/examples/pyproject.toml index 05b4ed0af..152e59473 100644 --- a/examples/pyproject.toml +++ b/examples/pyproject.toml @@ -5,7 +5,7 @@ description = "Labelbox Python Example Notebooks" authors = [{ name = "Labelbox", email = "docs@labelbox.com" }] readme = "README.md" # Python version matches labelbox SDK -requires-python = ">=3." +requires-python = ">=3.8" dependencies = [] [project.urls] From a83049d343e5e33ec488790ad042ed686876ae83 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 24 May 2024 21:51:33 +0000 Subject: [PATCH 2/4] :memo: README updated --- examples/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/README.md b/examples/README.md index 6e8faebec..8abd54521 100644 --- a/examples/README.md +++ b/examples/README.md @@ -153,11 +153,6 @@ Open In Github Open In Colab - - Import Labeled Dataset Image - Open In Github - Open In Colab - PDF Open In Github From 817d8d838a2ed0f8c85900fcd2bbb307a29a8ec8 Mon Sep 17 00:00:00 2001 From: Gabefire <33893811+Gabefire@users.noreply.github.com> Date: Fri, 24 May 2024 17:24:11 -0500 Subject: [PATCH 3/4] made typo fix and commiting for actions --- examples/CONTRIBUTING.md | 2 +- examples/template.ipynb | 112 ++++++++++++++++++++++++--------------- 2 files changed, 70 insertions(+), 44 deletions(-) diff --git a/examples/CONTRIBUTING.md b/examples/CONTRIBUTING.md index bb7dd5999..b747cd104 100644 --- a/examples/CONTRIBUTING.md +++ b/examples/CONTRIBUTING.md @@ -12,7 +12,7 @@ Thank you for contributing to our notebook examples! To ensure that your contrib ## General Notebook Requirements -Review our [template notebook](template.ipynbs) for general overview on how notebooks should be structure. This notebook and section just serves as a guide and exceptions can be made. Here are our general requirements: +Review our [template notebook](template.ipynbs) for general overview on how notebooks should be structure. The template notebook and this section just serves as a guide and exceptions can be made. Here are our general requirements: 1. Ensure that any modified notebooks run when edited. 2. Ensure that you update any relevant headers and comments within the code block you may add or change. diff --git a/examples/template.ipynb b/examples/template.ipynb index 6a052a9b6..53c17169f 100644 --- a/examples/template.ipynb +++ b/examples/template.ipynb @@ -1,18 +1,16 @@ { - "nbformat": 4, - "nbformat_minor": 2, - "metadata": {}, "cells": [ { + "cell_type": "markdown", "metadata": {}, "source": [ - "", - " ", + "\n", + " \n", "\n" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -24,110 +22,138 @@ "\n", "" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"\n# Always install \"labelbox[data]\" over labelbox to ensure dependencies are installed correctly\n# Include other installs in this block with similar format: \"%pip install -q \"", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "%pip install -q \"labelbox[data]\"\n", + "# Always install \"labelbox[data]\" over labelbox to ensure dependencies are installed correctly\n", + "# Include other installs in this block with similar format: \"%pip install -q \"" + ] }, { - "metadata": {}, - "source": "import labelbox as lb\n\n# Include other imports in this block try grouping similar imports together", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "import labelbox as lb\n", + "\n", + "# Include other imports in this block try grouping similar imports together" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## API key and client\n", "Provide a valid API key below in order to properly connect to the Labelbox client. Please review [Create API key guide](https://docs.labelbox.com/reference/create-api-key) for more information." - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# Add your API key\nAPI_KEY = None\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# Add your API key\n", + "API_KEY = None\n", + "# To get your API key go to: Workspace settings -> API -> Create API Key\n", + "client = lb.Client(api_key=API_KEY)" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Main section header\n", "\n", "* Include information on what section discusses" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Sub section header\n", "\n", "* Include information on what sub section discusses\n", "* If parameters are needed specify they need included" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "DATASET_ID = None\n# Inline Parameters should be all caps with a default value typically None", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "DATASET_ID = None\n", + "# Inline Parameters should be all caps with a default value typically None" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Notebook section with steps\n", "\n", "* For long winded processes step headers should be included" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "### Step \"step number\"" - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ - "* Include information on step" - ], - "cell_type": "markdown" + "* Include information about step" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "#### \"step number.sub step number\"\n", "\n", "* Some steps need sub steps and should be labeled like above. For example, 7.1, 7.2, etc." - ], - "cell_type": "markdown" + ] }, { + "cell_type": "markdown", "metadata": {}, "source": [ "## Clean up\n", "Uncomment and run the cell below to optionally delete Labelbox objects created" - ], - "cell_type": "markdown" + ] }, { - "metadata": {}, - "source": "# batch.delete()\n# project.delete()\n# dataset.delete()\n\n## Include other objects that were created", "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "# batch.delete()\n", + "# project.delete()\n", + "# dataset.delete()\n", + "\n", + "## Include other objects that were created" + ] } - ] -} \ No newline at end of file + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 2457b26e7ada2a3b822a95b0c41e4e8966f71eba Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 24 May 2024 22:25:02 +0000 Subject: [PATCH 4/4] :art: Cleaned --- examples/template.ipynb | 110 +++++++++++++++------------------------- 1 file changed, 42 insertions(+), 68 deletions(-) diff --git a/examples/template.ipynb b/examples/template.ipynb index 53c17169f..0411a416b 100644 --- a/examples/template.ipynb +++ b/examples/template.ipynb @@ -1,16 +1,18 @@ { + "nbformat": 4, + "nbformat_minor": 2, + "metadata": {}, "cells": [ { - "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - " \n", + "", + " ", "\n" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "\n", @@ -22,138 +24,110 @@ "\n", "" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "%pip install -q \"labelbox[data]\"\n# Always install \"labelbox[data]\" over labelbox to ensure dependencies are installed correctly\n# Include other installs in this block with similar format: \"%pip install -q \"", + "cell_type": "code", "outputs": [], - "source": [ - "%pip install -q \"labelbox[data]\"\n", - "# Always install \"labelbox[data]\" over labelbox to ensure dependencies are installed correctly\n", - "# Include other installs in this block with similar format: \"%pip install -q \"" - ] + "execution_count": null }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "import labelbox as lb\n\n# Include other imports in this block try grouping similar imports together", + "cell_type": "code", "outputs": [], - "source": [ - "import labelbox as lb\n", - "\n", - "# Include other imports in this block try grouping similar imports together" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## API key and client\n", "Provide a valid API key below in order to properly connect to the Labelbox client. Please review [Create API key guide](https://docs.labelbox.com/reference/create-api-key) for more information." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# Add your API key\nAPI_KEY = None\n# To get your API key go to: Workspace settings -> API -> Create API Key\nclient = lb.Client(api_key=API_KEY)", + "cell_type": "code", "outputs": [], - "source": [ - "# Add your API key\n", - "API_KEY = None\n", - "# To get your API key go to: Workspace settings -> API -> Create API Key\n", - "client = lb.Client(api_key=API_KEY)" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Main section header\n", "\n", "* Include information on what section discusses" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Sub section header\n", "\n", "* Include information on what sub section discusses\n", "* If parameters are needed specify they need included" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "DATASET_ID = None\n# Inline Parameters should be all caps with a default value typically None", + "cell_type": "code", "outputs": [], - "source": [ - "DATASET_ID = None\n", - "# Inline Parameters should be all caps with a default value typically None" - ] + "execution_count": null }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Notebook section with steps\n", "\n", "* For long winded processes step headers should be included" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "### Step \"step number\"" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "* Include information about step" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "#### \"step number.sub step number\"\n", "\n", "* Some steps need sub steps and should be labeled like above. For example, 7.1, 7.2, etc." - ] + ], + "cell_type": "markdown" }, { - "cell_type": "markdown", "metadata": {}, "source": [ "## Clean up\n", "Uncomment and run the cell below to optionally delete Labelbox objects created" - ] + ], + "cell_type": "markdown" }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "source": "# batch.delete()\n# project.delete()\n# dataset.delete()\n\n## Include other objects that were created", + "cell_type": "code", "outputs": [], - "source": [ - "# batch.delete()\n", - "# project.delete()\n", - "# dataset.delete()\n", - "\n", - "## Include other objects that were created" - ] + "execution_count": null } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + ] +} \ No newline at end of file