Skip to content

Commit 6b48538

Browse files
authored
[Vertex AI] Add snippets for multi-modal audio and PDF inputs (#14750)
1 parent feec4a0 commit 6b48538

File tree

3 files changed

+88
-1
lines changed

3 files changed

+88
-1
lines changed
Binary file not shown.
Binary file not shown.

FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,28 @@ import XCTest
2626
@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
2727
final class MultimodalSnippets: XCTestCase {
2828
let bundle = BundleTestUtil.bundle()
29-
lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-1.5-flash")
29+
lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-2.0-flash")
3030
lazy var videoURL = {
3131
guard let url = bundle.url(forResource: "animals", withExtension: "mp4") else {
3232
fatalError("Video file animals.mp4 not found in Resources.")
3333
}
3434
return url
3535
}()
3636

37+
lazy var audioURL = {
38+
guard let url = bundle.url(forResource: "hello-world", withExtension: "mp3") else {
39+
fatalError("Audio file hello-world.mp3 not found in Resources.")
40+
}
41+
return url
42+
}()
43+
44+
lazy var pdfURL = {
45+
guard let url = bundle.url(forResource: "gemini-report", withExtension: "pdf") else {
46+
fatalError("PDF file gemini-report.pdf not found in Resources.")
47+
}
48+
return url
49+
}()
50+
3751
override func setUpWithError() throws {
3852
try FirebaseApp.configureDefaultAppForSnippets()
3953
}
@@ -42,6 +56,8 @@ final class MultimodalSnippets: XCTestCase {
4256
await FirebaseApp.deleteDefaultAppForSnippets()
4357
}
4458

59+
// MARK: - Image Input
60+
4561
#if canImport(UIKit)
4662
func testMultimodalOneImageNonStreaming() async throws {
4763
guard let image = UIImage(systemName: "bicycle") else { fatalError() }
@@ -98,6 +114,8 @@ final class MultimodalSnippets: XCTestCase {
98114
}
99115
#endif // canImport(UIKit)
100116

117+
// MARK: - Video Input
118+
101119
func testMultimodalVideoNonStreaming() async throws {
102120
// Provide the video as `Data` with the appropriate MIME type
103121
let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
@@ -125,4 +143,73 @@ final class MultimodalSnippets: XCTestCase {
125143
}
126144
}
127145
}
146+
147+
// MARK: - Audio Input
148+
149+
func testMultiModalAudioNonStreaming() async throws {
150+
// Provide the audio as `Data` with the appropriate MIME type
151+
let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
152+
153+
// Provide a text prompt to include with the audio
154+
let prompt = "Transcribe what's said in this audio recording."
155+
156+
// To generate text output, call `generateContent` with the audio and text prompt
157+
let response = try await model.generateContent(audio, prompt)
158+
159+
// Print the generated text, handling the case where it might be nil
160+
print(response.text ?? "No text in response.")
161+
}
162+
163+
func testMultiModalAudioStreaming() async throws {
164+
// Provide the audio as `Data` with the appropriate MIME type
165+
let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
166+
167+
// Provide a text prompt to include with the audio
168+
let prompt = "Transcribe what's said in this audio recording."
169+
170+
// To stream generated text output, call `generateContentStream` with the audio and text prompt
171+
let contentStream = try model.generateContentStream(audio, prompt)
172+
173+
// Print the generated text, handling the case where it might be nil
174+
for try await chunk in contentStream {
175+
if let text = chunk.text {
176+
print(text)
177+
}
178+
}
179+
}
180+
181+
// MARK: - Document Input
182+
183+
func testMultiModalPDFStreaming() async throws {
184+
// Provide the PDF as `Data` with the appropriate MIME type
185+
let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
186+
187+
// Provide a text prompt to include with the PDF file
188+
let prompt = "Summarize the important results in this report."
189+
190+
// To stream generated text output, call `generateContentStream` with the PDF file and text
191+
// prompt
192+
let contentStream = try model.generateContentStream(pdf, prompt)
193+
194+
// Print the generated text, handling the case where it might be nil
195+
for try await chunk in contentStream {
196+
if let text = chunk.text {
197+
print(text)
198+
}
199+
}
200+
}
201+
202+
func testMultiModalPDFNonStreaming() async throws {
203+
// Provide the PDF as `Data` with the appropriate MIME type
204+
let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
205+
206+
// Provide a text prompt to include with the PDF file
207+
let prompt = "Summarize the important results in this report."
208+
209+
// To generate text output, call `generateContent` with the PDF file and text prompt
210+
let response = try await model.generateContent(pdf, prompt)
211+
212+
// Print the generated text, handling the case where it might be nil
213+
print(response.text ?? "No text in response.")
214+
}
128215
}

0 commit comments

Comments
 (0)