From 1243130974293eda728e8d27db59842dbda45c3a Mon Sep 17 00:00:00 2001 From: Mark McDonald Date: Mon, 13 May 2024 20:34:54 -0700 Subject: [PATCH] Updates to Prompting with Media notebook Applying changes OBO Anirudh. Co-authored-by: anirudh161 --- .../docs/prompting_with_media.ipynb | 89 ++++++++----------- 1 file changed, 38 insertions(+), 51 deletions(-) diff --git a/site/en/gemini-api/docs/prompting_with_media.ipynb b/site/en/gemini-api/docs/prompting_with_media.ipynb index cbe8786fe..b1f15fa25 100644 --- a/site/en/gemini-api/docs/prompting_with_media.ipynb +++ b/site/en/gemini-api/docs/prompting_with_media.ipynb @@ -209,15 +209,7 @@ "metadata": { "id": "N9NxXGZKKusG" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Uploaded file 'Sample drawing' as: https://generativelanguage.googleapis.com/v1beta/files/ui00j5zfuqe0\n" - ] - } - ], + "outputs": [], "source": [ "# Upload the file\n", "sample_file = genai.upload_file(path=\"image.jpg\",\n", @@ -245,9 +237,9 @@ "source": [ "### Get file\n", "\n", - "After uploading the file, you can verify the API has successfully received the files by calling `files.get`.\n", + "After uploading the file, verify that the File API has successfully received it by calling `files.get`.\n", "\n", - "It lets you get the file metadata that have been uploaded to the File API that are associated with the Cloud project your API key belongs to. Only the `name` (and by extension, the `uri`) are unique. Only use the `displayName` to identify files if you manage uniqueness yourself." + "`files.get` lets you get the file metadata that have been uploaded to the File API that are associated with the Cloud project your API key belongs to. Only the `name` (and by extension, the `uri`) are unique. Only use the `displayName` to identify files if you manage uniqueness yourself." ] }, { @@ -327,9 +319,9 @@ "id": "MNvhBdoDFnTC" }, "source": [ - "## Upload video to the File API\n", + "### Upload a video file to the File API\n", "\n", - "The Gemini API accepts video file formats directly. This example uses the short film \"Big Buck Bunny\".\n", + "The File API accepts video file formats directly. This example uses the short film \"Big Buck Bunny\".\n", "\n", "> \"Big Buck Bunny\" is (c) copyright 2008, Blender Foundation / www.bigbuckbunny.org and [licensed](https://peach.blender.org/about/) under the [Creative Commons Attribution 3.0](http://creativecommons.org/licenses/by/3.0/) License.\n", "\n", @@ -358,17 +350,8 @@ "video_file_name = \"BigBuckBunny_320x180.mp4\"\n", "\n", "print(f\"Uploading file...\")\n", - "file_response = genai.upload_file(path=video_file_name)\n", - "print(f\"Completed upload: {file_response.uri}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "06GCLdmwNin5" - }, - "source": [ - "NOTE: The File API service currently samples the video at 1 FPS and may be subject to change to provide the best inference quality." + "video_file = genai.upload_file(path=video_file_name)\n", + "print(f\"Completed upload: {video_file.uri}\")" ] }, { @@ -377,11 +360,11 @@ "id": "oOZmTUb4FWOa" }, "source": [ - "## Get File\n", + "### Get file\n", "\n", - "After uploading the file, you can verify the API has successfully received the files by calling the `files.get` method.\n", + "Verify the API has successfully received the files by calling the `files.get` method.\n", "\n", - "The `files.get` method lets you see the file uploaded to the File API that are associated with the Cloud project your API key belongs to. Only the `name` (and by extension, the `uri`) are unique." + "NOTE: Video files have a `State` field in the File API. When a video is uploaded, it will be in `PROCESSING` state until it is ready for inference. Only `ACTIVE` files can be used for model inference." ] }, { @@ -392,22 +375,15 @@ }, "outputs": [], "source": [ - "import requests\n", - "import googleapiclient\n", "import time\n", "\n", - "# Use Discovery until SDK are updated\n", - "GENAI_URL = f\"https://generativelanguage.googleapis.com/$discovery/rest?version=v1beta&key={GOOGLE_API_KEY}\"\n", - "discovery = requests.get(GENAI_URL)\n", - "service = googleapiclient.discovery.build_from_document(discovery.content, developerKey=GOOGLE_API_KEY)\n", - "resp = service.files().get(name=file_response.name).execute()\n", + "while video_file.state.name == \"PROCESSING\":\n", + " print('.', end='')\n", + " time.sleep(10)\n", + " video_file = genai.get_file(video_file.name)\n", "\n", - "while resp['state'] == \"PROCESSING\":\n", - " print(f\"File is in State {resp['state']}... Checking again in 5 seconds.\")\n", - " time.sleep(5)\n", - " resp = service.files().get(name=file_response.name).execute()\n", - "\n", - "print(f\"File is {resp['state']}. {file_response.uri}\")" + "if video_file.state.name == \"FAILED\":\n", + " raise ValueError(video_file.state.name)" ] }, { @@ -416,7 +392,7 @@ "id": "zS5NmQeXLqeS" }, "source": [ - "## Generate Content\n", + "### Generate content\n", "\n", "After the video has been uploaded, you can make `GenerateContent` requests that reference the File API URI." ] @@ -437,9 +413,7 @@ "\n", "# Make the LLM request.\n", "print(\"Making LLM inference request...\")\n", - "request = [{'role':'user', 'parts': [file_response]},\n", - " {'role':'user', 'parts': [prompt]}]\n", - "response = model.generate_content(request,\n", + "response = model.generate_content([video_file, prompt],\n", " request_options={\"timeout\": 600})\n", "print(response.text)" ] @@ -450,7 +424,7 @@ "id": "diCy9BgjLqeS" }, "source": [ - "## Delete File\n", + "### Delete files\n", "\n", "Files are automatically deleted after 2 days or you can manually delete them using `files.delete()`." ] @@ -463,8 +437,8 @@ }, "outputs": [], "source": [ - "genai.delete_file(file_response.name)\n", - "print(f'Deleted file {file_response.uri}')" + "genai.delete_file(video_file.name)\n", + "print(f'Deleted file {video_file.uri}')" ] }, { @@ -537,9 +511,22 @@ "\n", "You can use video data for prompting with the `gemini-1.5-pro` model.\n", "\n", + "- Video data is supported in the following common audio format [MIME types](https://developers.google.com/drive/api/guides/ref-export-formats):\n", + " - video/mp4\n", + " - video/mpeg\n", + " - video/mov\n", + " - video/avi\n", + " - video/x-flv\n", + " - video/mpg\n", + " - video/webm\n", + " - video/wmv\n", + " - video/3gpp\n", + "\n", + "- The File API service currently samples videos into images at 1 frame per second (FPS) and may be subject to change to provide the best inference quality. Individual images take up 258 tokens **regardless** of resolution and quality.\n", + "\n", "### Plain text formats\n", "\n", - "The File API supports uploading plain text files with the following MIME types:\n", + "The File API supports uploading plain text files with the following [MIME types](https://developers.google.com/drive/api/guides/ref-export-formats):\n", "- text/plain\n", "- text/html\n", "- text/css\n", @@ -563,12 +550,12 @@ { "cell_type": "markdown", "metadata": { - "id": "uploading_files_to_colab" + "id": "rIoNRWn0nwUy" }, "source": [ "## Appendix: Uploading files to Colab\n", "\n", - "This notebook uses the File API with files that were downloaded from the internet. If you're running this in Colab and want to use your own files, you first need to upload them to the Colab instance.\n", + "This notebook uses the File API with files that were downloaded from the internet. If you're running this in Colab and want to use your own files, you first need to upload them to the colab instance.\n", "\n", "First, click **Files** on the left sidebar, then click the **Upload** button:\n", "\n", @@ -581,7 +568,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "2QGBpboMmxHG" + "id": "VqAwyEa3nxaZ" }, "outputs": [], "source": [