diff --git a/examples/README.md b/examples/README.md
index fdf1907e7..faf0b39a2 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -85,6 +85,11 @@
 |
 |
+
+ Exporting to CSV |
+  |
+  |
+
diff --git a/examples/exports/exporting_to_csv.ipynb b/examples/exports/exporting_to_csv.ipynb
new file mode 100644
index 000000000..80d906c37
--- /dev/null
+++ b/examples/exports/exporting_to_csv.ipynb
@@ -0,0 +1,366 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 2,
+ "metadata": {},
+ "cells": [
+ {
+ "metadata": {},
+ "source": [
+ "",
+ " ",
+ " | \n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n",
+ "\n",
+ " \n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "# Export to CSV or Pandas format\n",
+ "\n",
+ "This notebook serves as a simplified How-To guide and provides examples of converting Labelbox export JSON to a CSV and [Pandas](https://pandas.pydata.org/) friendly format. "
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Advance approach\n",
+ "\n",
+ "For a more abstract approach, please visit our [LabelPandas](https://github.com/Labelbox/labelpandas) library. You can use this library to abstract the steps to be shown. In addition, this library supports importing CSV data. \n",
+ "\n",
+ "We strongly encourage collaboration - please feel free to fork this repo and tweak the code base to work for your own data, and make pull requests if you have suggestions on how to enhance the overall experience, add new features, or improve general performance."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Set up"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "%pip install -q --upgrade \"Labelbox[data]\"\n%pip install -q pandas",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid\nfrom pprint import pprint\nimport csv\nimport pandas as pd",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## API key and client\n",
+ "Provide a valid API key below to connect to the Labelbox client properly. For more information, please review the [Create API Key](https://docs.labelbox.com/reference/create-api-key) guide."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "API_KEY = None\nclient = lb.Client(api_key=API_KEY)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Create or select example project\n",
+ "\n",
+ "The below steps will set up a project that can be used for this demo. Please feel free to delete the code block below and uncomment the code block that fetches your own project directly. For more information on this setup, visit our [quick start guide](https://docs.labelbox.com/reference/quick-start)."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Create Project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# Create dataset with image data row\nglobal_key = str(uuid.uuid4())\n\ntest_img_url = {\n \"row_data\":\n \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n \"global_key\":\n global_key,\n}\n\ndataset = client.create_dataset(name=\"image-demo-dataset\")\ntask = dataset.create_data_rows([test_img_url])\ntask.wait_till_done()\nprint(\"Errors:\", task.errors)\nprint(\"Failed data rows:\", task.failed_data_rows)\n\n# Create ontology\nontology_builder = lb.OntologyBuilder(\n classifications=[ # List of Classification objects\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_question\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_question\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"free_text\"),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"nested_radio_question\",\n options=[\n lb.Option(\n \"first_radio_answer\",\n options=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(\"first_sub_radio_answer\")],\n )\n ],\n )\n ],\n ),\n ],\n tools=[ # List of Tool objects\n lb.Tool(tool=lb.Tool.Type.BBOX, name=\"bounding_box\"),\n lb.Tool(\n tool=lb.Tool.Type.BBOX,\n name=\"bbox_with_radio_subclass\",\n classifications=[\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"sub_radio_question\",\n options=[lb.Option(value=\"tool_first_sub_radio_answer\")],\n ),\n ],\n ),\n ],\n)\n\nontology = client.create_ontology(\n \"Image CSV Demo Ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Image,\n)\n\n# Set up project and connect ontology\nproject = client.create_project(name=\"Image Annotation Import Demo\",\n media_type=lb.MediaType.Image)\nproject.setup_editor(ontology)\n\n# Send data row towards our project\nbatch = project.create_batch(\n \"image-demo-batch\",\n global_keys=[\n global_key\n ], # paginated collection of data row objects, list of data row ids or global keys\n priority=1,\n)\n\nprint(f\"Batch: {batch}\")\n\n# Create a label and imported it towards our project\nradio_annotation = lb_types.ClassificationAnnotation(\n name=\"radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"second_radio_answer\")),\n)\nchecklist_annotation = lb_types.ClassificationAnnotation(\n name=\"checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(name=\"first_checklist_answer\"),\n lb_types.ClassificationAnswer(name=\"second_checklist_answer\"),\n ]),\n)\ntext_annotation = lb_types.ClassificationAnnotation(\n name=\"free_text\",\n value=lb_types.Text(answer=\"sample text\"),\n)\nnested_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\nbbox_annotation = lb_types.ObjectAnnotation(\n name=\"bounding_box\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=1690, y=977),\n end=lb_types.Point(x=1915, y=1307),\n ),\n)\nbbox_with_radio_subclass_annotation = lb_types.ObjectAnnotation(\n name=\"bbox_with_radio_subclass\",\n value=lb_types.Rectangle(\n start=lb_types.Point(x=541, y=933), # x = left, y = top\n end=lb_types.Point(x=871, y=1124), # x= left + width , y = top + height\n ),\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"tool_first_sub_radio_answer\")),\n )\n ],\n)\n\nlabel = []\nannotations = [\n radio_annotation,\n nested_radio_annotation,\n checklist_annotation,\n text_annotation,\n bbox_annotation,\n bbox_with_radio_subclass_annotation,\n]\n\nlabel.append(\n lb_types.Label(data={\"global_key\": global_key}, annotations=annotations))\n\nupload_job = lb.LabelImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=\"label_import_job\" + str(uuid.uuid4()),\n labels=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Select project"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# PROJECT_ID = None\n# project = client.get_project(PROJECT_ID)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## CSV format overview\n",
+ "\n",
+ "In order to convert our Labelbox JSON data to a format more CSV friendly, we must first define the needed structure of our JSON. A common format that is versatile for both the built-in Python CSV writer and Pandas is as follows: \n",
+ "\n",
+ "```python\n",
+ "[\n",
+ " {\"\":\"\":\"\":\"\":\" None:\n \"\"\"Finds classification features inside an ontology recursively and returns them in a list\"\"\"\n for classification in classifications:\n if \"name\" in classification:\n class_list.append({\n \"feature_schema_id\": classification[\"featureSchemaId\"],\n \"column_name\": classification[\"instructions\"],\n })\n if \"options\" in classification:\n get_classification_features(classification[\"options\"], class_list)\n return class_list\n\n\ndef get_tool_features(tools: list) -> None:\n \"\"\"Creates list of tool names from ontology\"\"\"\n tool_list = []\n for tool in tools:\n tool_list.append({\n \"feature_schema_id\": tool[\"featureSchemaId\"],\n \"column_name\": tool[\"name\"],\n })\n if \"classifications\" in tool:\n tool_list = get_classification_features(tool[\"classifications\"],\n tool_list)\n return tool_list",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "# Get ontology from project and normalized towards python dictionary\nontology = project.ontology().normalized\n\nclass_annotation_columns = get_classification_features(\n ontology[\"classifications\"])\ntool_annotation_columns = get_tool_features(ontology[\"tools\"])",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 3: Define our functions and strategy used to parse through our data\n",
+ "\n",
+ "Now that we have our columns defined, we need to come up with a strategy for navigating our export data. Review this [sample export](https://docs.labelbox.com/reference/export-image-annotations#sample-project-export) to follow along. While creating our columns, it is always best to first check if a key exists in your data row before populating a column. This is especially important for optional fields. In this demo, we will populate the value `None` for anything not present, which will result in a blank cell our CSV.\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Data row detail base columns\n",
+ "The data row details can be accessed within a depth of one or two keys. Below is a function we will use to access the columns we defined. The parameters are the data row itself, the dictionary row that will be used to make our list, and our base columns list."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "def get_base_data_row_columns(data_row: dict[str:str], csv_row: dict[str:str],\n base_columns: list[str]) -> dict[str:str]:\n for base_column in base_columns:\n if base_column == \"Data Row ID\":\n csv_row[base_column] = data_row[\"data_row\"][\"id\"]\n\n elif base_column == \"Global Key\":\n if (\"global_key\"\n in data_row[\"data_row\"]): # Check if global key exists\n csv_row[base_column] = data_row[\"data_row\"][\"global_key\"]\n else:\n csv_row[base_column] = (\n None # If global key does not exist on data row set cell to None. This will create a blank cell on your csv\n )\n\n elif base_column == \"External ID\":\n if (\"external_id\"\n in data_row[\"data_row\"]): # Check if external_id exists\n csv_row[base_column] = data_row[\"data_row\"][\"external_id\"]\n else:\n csv_row[base_column] = (\n None # If external id does not exist on data row set cell to None. This will create a blank cell on your csv\n )\n\n elif base_column == \"Project ID\":\n csv_row[base_column] = project.uid\n return csv_row",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Label detail base columns\n",
+ "The label details are similar to data row details but exist at our export's label level. Later in the guide we will demonstrate how to get our exported data row at this level. The function below shows the process of obtaining the details we defined above. The parameters are the label, the dictionary row that we will be modifying, and the label detail column list we created."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "def get_base_label_columns(label: dict[str:str], csv_row: dict[str:str],\n label_base_columns: list[str]) -> dict[str:str]:\n for label_base_column in label_base_columns:\n if label_base_column == \"Label ID\":\n csv_row[label_base_column] = label[\"id\"]\n\n elif label_base_columns == \"Created By\":\n if (\n \"label_details\" in label\n ): # Check if label details is present. This field can be omitted in export.\n csv_row[label_base_column] = label_base_columns[\n \"label_details\"][\"created_by\"]\n else:\n csv_row[label_base_column] = None\n\n elif label_base_column == \"Skipped\":\n if (\n \"performance_details\" in label\n ): # Check if performance details are present. This field can be omitted in export.\n csv_row[label_base_column] = label[\"performance_details\"][\n \"skipped\"]\n else:\n csv_row[label_base_column] = None\n\n return csv_row",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Label annotation columns\n",
+ "The label annotations are the final columns we will need to obtain. The approach to obtaining these fields is more challenging than the approach we made for our detail columns. Suppose we attempt to obtain the fields with conditional statements and hard-defined paths. In that case, we will run into issues as each label can have annotations in different orders, at different depths, or not present at all. This will quickly create a mess, especially when we want our methods to work for more than one ontology. The best and cleanest way of obtaining these annotations inside our export data is through a recursive function.\n",
+ "\n",
+ "#### Recursion\n",
+ "A recursive function can be defined as a routine that calls itself directly or indirectly. They solve problems by solving smaller instances of the same problem. This technique is commonly used in programming to solve problems that can be broken down into simpler, similar subproblems. Our sub-problem, in this case, is obtaining each individual annotation. A recursive function is divided into two components:\n",
+ "\n",
+ "- **Base case:** This is a termination condition that prevents the function from calling itself indefinitely.\n",
+ "\n",
+ "- **Recursive case:** The function calls itself with the modified arguments in the recursive case. The recursive case should move closer to the base case with each iteration.\n",
+ "\n",
+ "For our example, our base case will be either the annotation exists on the label (return the value/answer), or it does not (return `None`). Our recursive case would be finding more classifications to parse.\n",
+ "\n",
+ "In the below code block, I will highlight a few important details inside our function. Essentially, we will be navigating through our JSON file by moving one classification key at a time until we find our annotation or, if everything has been searched, returning `None`, which will populate a blank cell on our CSV table. \n",
+ "\n",
+ "#### Tools\n",
+ "Tools are not nested but they can have nested classifications we will use or `get_feature_answers` function below to find the nested classification. Since tools are at the base level of a label and each tool has a different value key name, we will only be searching for bounding boxes for this tutorial. If you want to include other tools, reference our export guide for your data type and find the appropriate key to add on."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "def get_feature_answers(feature: str,\n annotations: list[dict[str:str]]) -> None | str:\n \"\"\"Returns answer of feature provided by navigating through a label's annotation list. Will return None if answer is not found.\n\n Args:\n feature (str): feature we are searching\n classifications (list[dict[str:str]]): annotation list that we will be searching for our feature with.\n\n Returns:\n None | str: The answer/value of the feature returns None if nothing is found\n \"\"\"\n for annotation in annotations:\n print(annotation)\n if (annotation[\"feature_schema_id\"] == feature[\"feature_schema_id\"]\n ): # Base conditions (found feature)\n if \"text_answer\" in annotation:\n return annotation[\"text_answer\"][\"content\"]\n if \"radio_answer\" in annotation:\n return annotation[\"radio_answer\"][\"value\"]\n if \"checklist_answers\" in annotation:\n # Since classifications can have more then one answer. This is set up to combine all classifications separated by a comma. Feel free to modify.\n return \", \".join([\n check_list_ans[\"value\"]\n for check_list_ans in annotation[\"checklist_answers\"]\n ])\n if \"bounding_box\" in annotation:\n return annotation[\"bounding_box\"]\n # Add more tools here with similar pattern as above\n\n # Recursion cases (found more classifications to search through)\n if \"radio_answer\" in annotation:\n if len(annotation[\"radio_answer\"][\"classifications\"]) > 0:\n value = get_feature_answers(\n feature, annotation[\"radio_answer\"][\"classifications\"]\n ) # Call function again return value if answer found\n if value:\n return value\n if \"checklist_answers\" in annotation:\n for checklist_ans in annotation[\"checklist_answers\"]:\n if len(checklist_ans[\"classifications\"]) > 0:\n value = get_feature_answers(\n feature, checklist_ans[\"classifications\"])\n if value:\n return value\n if (\"classifications\"\n in annotation): # case for if tool has classifications\n if len(annotation[\"classifications\"]) > 0:\n value = get_feature_answers(feature,\n annotation[\"classifications\"])\n if value:\n return value\n\n return None # Base case if searched through classifications and nothing was found (end of JSON). This can be omitted but included to visualize",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 4: Setting up our main data row handler function\n",
+ "Before we can start exporting, we need to set up our main data row handler. This function will be fed straight into our export. This function will put everything together and connect all the pieces. We will also be defining our global dictionary list that will be used to create our CSVs. The output parameter represents each data row."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "GLOBAL_CSV_LIST = []\n\n\ndef main(output: lb.BufferedJsonConverterOutput):\n\n # Navigate to our label list\n labels = output.json[\"projects\"][project.uid][\"labels\"]\n for label in labels:\n # Define our CSV \"row\"\n csv_row = dict()\n\n # Start with data row base columns\n csv_row = get_base_data_row_columns(output.json, csv_row,\n data_row_base_columns)\n\n # Add our label details\n csv_row = get_base_label_columns(label, csv_row, label_base_columns)\n\n # Add classification features\n for classification in class_annotation_columns:\n csv_row[classification[\"column_name\"]] = get_feature_answers(\n classification, label[\"annotations\"][\"classifications\"])\n\n # Add tools features\n for tool in tool_annotation_columns:\n csv_row[tool[\"column_name\"]] = get_feature_answers(\n tool, label[\"annotations\"][\"objects\"])\n\n # Append to global csv list\n GLOBAL_CSV_LIST.append(csv_row)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 5: Export our data\n",
+ "Now that we have defined functions and strategies, we are ready to export. Below, we are exporting directly from our project and feeding in the main function we created above."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# Params required to obtain all fields we need\nparams = {\"performance_details\": True, \"label_details\": True}\n\nexport_task = project.export(params=params)\nexport_task.wait_till_done()\n\n# Conditional for if export task has errors\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nif export_task.has_result():\n export_json = export_task.get_buffered_stream(\n stream_type=lb.StreamType.RESULT\n ).start(\n stream_handler=main # Feeding our data row handler directly into export\n )",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "If everything went through correctly, you should see your `GLOBAL_CSV_LIST` printed out below with all your \"rows\" filled out."
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "pprint(GLOBAL_CSV_LIST)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "## Step 6: Convert to our desired format\n",
+ "\n",
+ "The hard part is now completed!\ud83d\ude80 Now that you have your export in a flattened format, you can easily convert to a CSV or a Pandas DataFrame!"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Option A: CSV writer"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "with open(\"file.csv\", \"w\", newline=\"\") as csvfile:\n # Columns\n fieldnames = (data_row_base_columns + label_base_columns +\n [name[\"column_name\"] for name in class_annotation_columns] +\n [name[\"column_name\"] for name in tool_annotation_columns])\n writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n\n writer.writeheader()\n\n for row in GLOBAL_CSV_LIST:\n writer.writerow(row)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": [
+ "### Option B: Pandas DataFrame"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "columns = (data_row_base_columns + label_base_columns +\n [name[\"column_name\"] for name in class_annotation_columns] +\n [name[\"column_name\"] for name in tool_annotation_columns])\npd.DataFrame(GLOBAL_CSV_LIST, columns=columns)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ }
+ ]
+}
\ No newline at end of file
diff --git a/examples/scripts/generate_readme.py b/examples/scripts/generate_readme.py
index 80939acfc..135f9421d 100644
--- a/examples/scripts/generate_readme.py
+++ b/examples/scripts/generate_readme.py
@@ -69,7 +69,7 @@ def create_title(link: str) -> str:
# List to lower case certain words and list to keep certain acronyms capitalized
lower_case_words = ["to"]
- acronyms = ["html", "pdf", "llm", "dicom", "sam"]
+ acronyms = ["html", "pdf", "llm", "dicom", "sam", "csv"]
for word in split_link:
if word.lower() in acronyms: