From 2c955d192eb8caae7687dbf67d55a0e064111995 Mon Sep 17 00:00:00 2001 From: David Huntsperger Date: Sun, 5 May 2024 20:53:18 -0700 Subject: [PATCH 1/4] add plain text to multimodal prompting guide --- .../docs/prompting_with_media.ipynb | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/site/en/gemini-api/docs/prompting_with_media.ipynb b/site/en/gemini-api/docs/prompting_with_media.ipynb index 567082162..608a93e5c 100644 --- a/site/en/gemini-api/docs/prompting_with_media.ipynb +++ b/site/en/gemini-api/docs/prompting_with_media.ipynb @@ -65,10 +65,9 @@ }, "source": [ "The Gemini API supports prompting with text, image, and audio data, also known as *multimodal* prompting. You can include text, image,\n", - "and audio in your prompts. For small images, you can point the Gemini model\n", - "directly to a local file when providing a prompt. For larger images, videos\n", - "(sequences of image frames), and audio, upload the files with the [File\n", - "API](https://ai.google.dev/api/rest/v1beta/files) before including them in\n", + "and audio in your prompts. For small files, you can point the Gemini model\n", + "directly to a local file when providing a prompt. Upload larger files with the\n", + "[File API](https://ai.google.dev/api/rest/v1beta/files) before including them in\n", "prompts.\n", "\n", "The File API lets you store up to 20GB of files per project, with each file not\n", @@ -176,7 +175,7 @@ "source": [ "## Upload a file to the File API\n", "\n", - "The File API lets you upload a variety of multimodal MIME types, including images and audio formats. The File API handles inputs that can be used to generate content with [`model.generateContent`](https://ai.google.dev/api/rest/v1/models/generateContent) or [`model.streamGenerateContent`](https://ai.google.dev/api/rest/v1/models/streamGenerateContent).\n", + "The File API lets you upload a variety of multimodal MIME types, including plain text, images, and audio formats. The File API handles inputs that can be used to generate content with [`model.generateContent`](https://ai.google.dev/api/rest/v1/models/generateContent) or [`model.streamGenerateContent`](https://ai.google.dev/api/rest/v1/models/streamGenerateContent).\n", "\n", "The File API accepts files under 2GB in size and can store up to 20GB of files per project. Files last for 2 days and cannot be downloaded from the API." ] @@ -328,7 +327,7 @@ "source": [ "## Supported file formats\n", "\n", - "Gemini models support prompting with multiple file formats. This section explains considerations in using general media formats for prompting, specifically image, audio, and video files. You can use media files for prompting only with specific model versions, as shown in the following table.\n", + "Gemini models support prompting with multiple file formats. This section explains considerations in using general media formats for prompting, specifically image, audio, video, and plain text files. You can use media files for prompting only with specific model versions, as shown in the following table.\n", "\n", "\n", " \n", @@ -337,6 +336,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -345,12 +345,14 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", + " \n", " \n", " \n", "
ImagesAudioVideoPlain text
✔ (3600 max image files)
Gemini Pro Vision✔ (16 max image files)
\n", @@ -387,7 +389,30 @@ "\n", "You can use video data for prompting with the `gemini-1.5-pro` model. However, video file formats are not supported as direct inputs by the Gemini API. You can use video data as prompt input by breaking down the video into a series of still frame images and a separate audio file. This approach lets you manage the amount of data, and the level of detail provided by the video, by choosing how many frames per second are included in your prompt from the video file.\n", "\n", - "Note: Video files added to a prompt as constituent parts, audio file and image frames, are considered as separate prompt data inputs by the model. For this reason, requests or questions that specify the time when *both* an audio snippet and video frames appear in the source video may not produce useful results." + "Note: Video files added to a prompt as constituent parts, audio file and image frames, are considered as separate prompt data inputs by the model. For this reason, requests or questions that specify the time when *both* an audio snippet and video frames appear in the source video may not produce useful results.\n", + "\n", + "### Plain text formats\n", + "\n", + "You can use plain text files for prompting with the `gemini-1.5-pro` model. When you use plain text files for prompting, they are subject to the following limitations and requirements:\n", + "\n", + "- The File API supports uploading plain text files with the following MIME\n", + " types:\n", + " - text/plain\n", + " - text/html \n", + " - text/css\n", + " - text/javascript\n", + " - application/x-javascript\n", + " - text/x-typescript\n", + " - application/x-typescript\n", + " - text/csv\n", + " - text/markdown\n", + " - text/x-python\n", + " - application/x-python-code\n", + " - application/json\n", + " - text/xml\n", + " - application/rtf\n", + " - text/rtf\n", + " - video/text/timestamp" ] }, { From 94972d2731586fcd4aae63461495fca42de21546 Mon Sep 17 00:00:00 2001 From: David Huntsperger Date: Sun, 5 May 2024 21:14:02 -0700 Subject: [PATCH 2/4] remove plain text column from media table --- site/en/gemini-api/docs/prompting_with_media.ipynb | 3 --- 1 file changed, 3 deletions(-) diff --git a/site/en/gemini-api/docs/prompting_with_media.ipynb b/site/en/gemini-api/docs/prompting_with_media.ipynb index 608a93e5c..4309fd4aa 100644 --- a/site/en/gemini-api/docs/prompting_with_media.ipynb +++ b/site/en/gemini-api/docs/prompting_with_media.ipynb @@ -336,7 +336,6 @@ " Images\n", " Audio\n", " Video\n", - " Plain text\n", " \n", " \n", " \n", @@ -345,14 +344,12 @@ " ✔ (3600 max image files)\n", " ✔\n", " ✔\n", - " ✔\n", " \n", " \n", " Gemini Pro Vision\n", " ✔ (16 max image files)\n", " \n", " \n", - " \n", " \n", " \n", "\n", From 69e398289ae78e81a38003fb32c61c46c7f79641 Mon Sep 17 00:00:00 2001 From: David Huntsperger Date: Sun, 5 May 2024 21:17:47 -0700 Subject: [PATCH 3/4] remove model-specific info from plain text format documentation --- site/en/gemini-api/docs/prompting_with_media.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/en/gemini-api/docs/prompting_with_media.ipynb b/site/en/gemini-api/docs/prompting_with_media.ipynb index 4309fd4aa..353eb3f19 100644 --- a/site/en/gemini-api/docs/prompting_with_media.ipynb +++ b/site/en/gemini-api/docs/prompting_with_media.ipynb @@ -390,7 +390,7 @@ "\n", "### Plain text formats\n", "\n", - "You can use plain text files for prompting with the `gemini-1.5-pro` model. When you use plain text files for prompting, they are subject to the following limitations and requirements:\n", + "When you use plain text files for prompting, they are subject to the following limitations and requirements:\n", "\n", "- The File API supports uploading plain text files with the following MIME\n", " types:\n", From 2b45644c8909528e11c579cfc4670b06e3ea1fc9 Mon Sep 17 00:00:00 2001 From: David Huntsperger Date: Mon, 6 May 2024 06:26:11 -0700 Subject: [PATCH 4/4] clarifying File API plain text requirements --- .../docs/prompting_with_media.ipynb | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/site/en/gemini-api/docs/prompting_with_media.ipynb b/site/en/gemini-api/docs/prompting_with_media.ipynb index 353eb3f19..c8d53c223 100644 --- a/site/en/gemini-api/docs/prompting_with_media.ipynb +++ b/site/en/gemini-api/docs/prompting_with_media.ipynb @@ -390,26 +390,26 @@ "\n", "### Plain text formats\n", "\n", - "When you use plain text files for prompting, they are subject to the following limitations and requirements:\n", - "\n", - "- The File API supports uploading plain text files with the following MIME\n", - " types:\n", - " - text/plain\n", - " - text/html \n", - " - text/css\n", - " - text/javascript\n", - " - application/x-javascript\n", - " - text/x-typescript\n", - " - application/x-typescript\n", - " - text/csv\n", - " - text/markdown\n", - " - text/x-python\n", - " - application/x-python-code\n", - " - application/json\n", - " - text/xml\n", - " - application/rtf\n", - " - text/rtf\n", - " - video/text/timestamp" + "The File API supports uploading plain text files with the following MIME types:\n", + "- text/plain\n", + "- text/html \n", + "- text/css\n", + "- text/javascript\n", + "- application/x-javascript\n", + "- text/x-typescript\n", + "- application/x-typescript\n", + "- text/csv\n", + "- text/markdown\n", + "- text/x-python\n", + "- application/x-python-code\n", + "- application/json\n", + "- text/xml\n", + "- application/rtf\n", + "- text/rtf\n", + "- video/text/timestamp\n", + "\n", + "For plain text files with a MIME type not on the list, you can try specifying\n", + "the MIME type manually." ] }, {