diff --git a/annotation_import/prompt_response.ipynb b/annotation_import/prompt_response.ipynb index 935689a..4694c70 100644 --- a/annotation_import/prompt_response.ipynb +++ b/annotation_import/prompt_response.ipynb @@ -75,14 +75,14 @@ }, { "metadata": {}, - "source": "%pip install -q \"labelbox[data]\"", + "source": "%pip install -q --upgrade \"labelbox[data]\"", "cell_type": "code", "outputs": [], "execution_count": null }, { "metadata": {}, - "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport uuid", + "source": "import labelbox as lb\nimport labelbox.types as lb_types\nimport time\nimport uuid", "cell_type": "code", "outputs": [], "execution_count": null @@ -141,7 +141,7 @@ }, { "metadata": {}, - "source": "prompt_annotation = lb_types.PromptClassificationAnnotation(\n name=\"Follow the prompt and select answers\",\n value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n)\n\nprompt_annotation_ndjson = {\n \"name\": \"Follow the prompt and select answers\",\n \"answer\": \"This is an example of a prompt\",\n}", + "source": "prompt_annotation = lb_types.PromptClassificationAnnotation(\n name=\"prompt text\",\n value=lb_types.PromptText(answer=\"This is an example of a prompt\"),\n)\n\nprompt_annotation_ndjson = {\n \"name\": \"prompt text\",\n \"answer\": \"This is an example of a prompt\",\n}", "cell_type": "code", "outputs": [], "execution_count": null @@ -190,7 +190,7 @@ }, { "metadata": {}, - "source": "response_text_annotation = lb_types.ClassificationAnnotation(\n name=\"Provide a reason for your choice\",\n value=lb_types.Text(answer=\"This is an example of a response text\"),\n)\n\nresponse_text_annotation_ndjson = {\n \"name\": \"Provide a reason for your choice\",\n \"answer\": \"This is an example of a response text\",\n}", + "source": "response_text_annotation = lb_types.ClassificationAnnotation(\n name=\"response text\",\n value=lb_types.Text(answer=\"This is an example of a response text\"),\n)\n\nresponse_text_annotation_ndjson = {\n \"name\": \"response text\",\n \"answer\": \"This is an example of a response text\",\n}", "cell_type": "code", "outputs": [], "execution_count": null @@ -204,7 +204,7 @@ }, { "metadata": {}, - "source": "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_response_radio_annotation_ndjson = {\n \"name\":\n \"nested_radio_question\",\n \"answer\": [{\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n }],\n}\n\nnested_response_checklist_annotation_ndjson = {\n \"name\":\n \"nested_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", + "source": "nested_response_radio_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_radio_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_radio_question\",\n value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n name=\"first_sub_radio_answer\")),\n )\n ],\n )),\n)\n\nnested_response_checklist_annotation = lb_types.ClassificationAnnotation(\n name=\"nested_response_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_checklist_answer\",\n classifications=[\n lb_types.ClassificationAnnotation(\n name=\"sub_checklist_question\",\n value=lb_types.Checklist(answer=[\n lb_types.ClassificationAnswer(\n name=\"first_sub_checklist_answer\")\n ]),\n )\n ],\n )\n ]),\n)\n\nnested_response_radio_annotation_ndjson = {\n \"name\":\n \"nested_response_radio_question\",\n \"answer\": [{\n \"name\":\n \"first_radio_answer\",\n \"classifications\": [{\n \"name\": \"sub_radio_question\",\n \"answer\": {\n \"name\": \"first_sub_radio_answer\"\n },\n }],\n }],\n}\n\nnested_response_checklist_annotation_ndjson = {\n \"name\":\n \"nested_response_checklist_question\",\n \"answer\": [{\n \"name\":\n \"first_checklist_answer\",\n \"classifications\": [{\n \"name\": \"sub_checklist_question\",\n \"answer\": {\n \"name\": \"first_sub_checklist_answer\"\n },\n }],\n }],\n}", "cell_type": "code", "outputs": [], "execution_count": null @@ -225,13 +225,13 @@ "source": [ "### Prompt response and prompt creation\n", "\n", - "A prompts and responses creation project automatically generates empty data rows upon creation. You will then need to obtain either the `global_keys` or `data_row_ids` attached to the generated data rows by exporting them from the created project." + "A prompts and responses creation project automatically generates empty data rows upon creation." ], "cell_type": "markdown" }, { "metadata": {}, - "source": "prompt_response_project = client.create_model_evaluation_project(\n name=\"Demo prompt response project\",\n media_type=lb.MediaType.LLMPromptResponseCreation,\n dataset_name=\"Demo prompt response dataset\",\n data_row_count=1,\n)\n\nexport_task = prompt_response_project.export()\nexport_task.wait_till_done()\n\n# Check export for any errors\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nstream = export_task.get_buffered_stream()\n\n# Obtain global keys to be used later on\nglobal_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]", + "source": "prompt_response_project = client.create_prompt_response_generation_project(\n name=\"Demo prompt response project\",\n media_type=lb.MediaType.LLMPromptResponseCreation,\n dataset_name=\"Demo prompt response dataset\",\n data_row_count=1,\n)", "cell_type": "code", "outputs": [], "execution_count": null @@ -251,7 +251,7 @@ }, { "metadata": {}, - "source": "ontology_builder = lb.OntologyBuilder(\n tools=[],\n classifications=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.PROMPT,\n name=\"prompt text\",\n character_min=1, # Minimum character count of prompt field (optional)\n character_max=\n 20, # Maximum character count of prompt field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"response checklist feature\",\n options=[\n lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"response radio feature\",\n options=[\n lb.ResponseOption(value=\"first_radio_answer\"),\n lb.ResponseOption(value=\"second_radio_answer\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n name=\"response text\",\n character_min=\n 1, # Minimum character count of response text field (optional)\n character_max=\n 20, # Maximum character count of response text field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"nested_response_radio_question\",\n options=[\n lb.ResponseOption(\n \"first_radio_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.\n RESPONSE_RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.ResponseOption(\"first_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"nested_response_checklist_question\",\n options=[\n lb.ResponseOption(\n \"first_checklist_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.\n RESPONSE_CHECKLIST,\n name=\"sub_checklist_question\",\n options=[\n lb.ResponseOption(\"first_sub_checklist_answer\")\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\n# Create ontology\nontology = client.create_ontology(\n \"Prompt and response ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.LLMPromptResponseCreation,\n)", + "source": "ontology_builder = lb.OntologyBuilder(\n tools=[],\n classifications=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.PROMPT,\n name=\"prompt text\",\n character_min=1, # Minimum character count of prompt field (optional)\n character_max=\n 50, # Maximum character count of prompt field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"response checklist feature\",\n options=[\n lb.ResponseOption(value=\"option_1\", label=\"option_1\"),\n lb.ResponseOption(value=\"option_2\", label=\"option_2\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"response radio feature\",\n options=[\n lb.ResponseOption(value=\"first_radio_answer\"),\n lb.ResponseOption(value=\"second_radio_answer\"),\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_TEXT,\n name=\"response text\",\n character_min=\n 1, # Minimum character count of response text field (optional)\n character_max=\n 50, # Maximum character count of response text field (optional)\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_RADIO,\n name=\"nested_response_radio_question\",\n options=[\n lb.ResponseOption(\n \"first_radio_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.\n RESPONSE_RADIO,\n name=\"sub_radio_question\",\n options=[\n lb.ResponseOption(\"first_sub_radio_answer\")\n ],\n )\n ],\n )\n ],\n ),\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.RESPONSE_CHECKLIST,\n name=\"nested_response_checklist_question\",\n options=[\n lb.ResponseOption(\n \"first_checklist_answer\",\n options=[\n lb.PromptResponseClassification(\n class_type=lb.PromptResponseClassification.Type.\n RESPONSE_CHECKLIST,\n name=\"sub_checklist_question\",\n options=[\n lb.ResponseOption(\"first_sub_checklist_answer\")\n ],\n )\n ],\n )\n ],\n ),\n ],\n)\n\n# Create ontology\nontology = client.create_ontology(\n \"Prompt and response ontology\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.LLMPromptResponseCreation,\n)\n\n# Attach ontology to project\nprompt_response_project.connect_ontology(ontology)", "cell_type": "code", "outputs": [], "execution_count": null @@ -259,7 +259,23 @@ { "metadata": {}, "source": [ - "## Step 3: Create the annotations payload\n", + "## Step 3: Export for `global_keys`\n", + "\n", + " You will then need to obtain either the `global_keys` or `data_row_ids` attached to the generated data rows by exporting them from the created project. Since the generation of data rows is an async process you will need to wait for the project data rows to be completed before exporting." + ], + "cell_type": "markdown" + }, + { + "metadata": {}, + "source": "time.sleep(20)\n\nexport_task = prompt_response_project.export()\nexport_task.wait_till_done()\n\n# Check export for any errors\nif export_task.has_errors():\n export_task.get_buffered_stream(stream_type=lb.StreamType.ERRORS).start(\n stream_handler=lambda error: print(error))\n\nstream = export_task.get_buffered_stream()\n\n# Obtain global keys to be used later on\nglobal_keys = [dr.json[\"data_row\"][\"global_key\"] for dr in stream]", + "cell_type": "code", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "source": [ + "## Step 4: Create the annotations payload\n", "\n", "For prelabeled (model-assisted labeling) scenarios, pass your payload as the value of the `predictions` parameter. For ground truths, pass the payload to the `labels` parameter." ],