diff --git a/FirebaseVertexAI/Tests/Unit/Resources/gemini-report.pdf b/FirebaseVertexAI/Tests/Unit/Resources/gemini-report.pdf new file mode 100644 index 00000000000..2a2f00cb77b Binary files /dev/null and b/FirebaseVertexAI/Tests/Unit/Resources/gemini-report.pdf differ diff --git a/FirebaseVertexAI/Tests/Unit/Resources/hello-world.mp3 b/FirebaseVertexAI/Tests/Unit/Resources/hello-world.mp3 new file mode 100644 index 00000000000..be617e65a5a Binary files /dev/null and b/FirebaseVertexAI/Tests/Unit/Resources/hello-world.mp3 differ diff --git a/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift b/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift index 421b9897def..9e4683d8c64 100644 --- a/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift +++ b/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift @@ -26,7 +26,7 @@ import XCTest @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) final class MultimodalSnippets: XCTestCase { let bundle = BundleTestUtil.bundle() - lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-1.5-flash") + lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-2.0-flash") lazy var videoURL = { guard let url = bundle.url(forResource: "animals", withExtension: "mp4") else { fatalError("Video file animals.mp4 not found in Resources.") @@ -34,6 +34,20 @@ final class MultimodalSnippets: XCTestCase { return url }() + lazy var audioURL = { + guard let url = bundle.url(forResource: "hello-world", withExtension: "mp3") else { + fatalError("Audio file hello-world.mp3 not found in Resources.") + } + return url + }() + + lazy var pdfURL = { + guard let url = bundle.url(forResource: "gemini-report", withExtension: "pdf") else { + fatalError("PDF file gemini-report.pdf not found in Resources.") + } + return url + }() + override func setUpWithError() throws { try FirebaseApp.configureDefaultAppForSnippets() } @@ -42,6 +56,8 @@ final class MultimodalSnippets: XCTestCase { await FirebaseApp.deleteDefaultAppForSnippets() } + // MARK: - Image Input + #if canImport(UIKit) func testMultimodalOneImageNonStreaming() async throws { guard let image = UIImage(systemName: "bicycle") else { fatalError() } @@ -98,6 +114,8 @@ final class MultimodalSnippets: XCTestCase { } #endif // canImport(UIKit) + // MARK: - Video Input + func testMultimodalVideoNonStreaming() async throws { // Provide the video as `Data` with the appropriate MIME type let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4") @@ -125,4 +143,73 @@ final class MultimodalSnippets: XCTestCase { } } } + + // MARK: - Audio Input + + func testMultiModalAudioNonStreaming() async throws { + // Provide the audio as `Data` with the appropriate MIME type + let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg") + + // Provide a text prompt to include with the audio + let prompt = "Transcribe what's said in this audio recording." + + // To generate text output, call `generateContent` with the audio and text prompt + let response = try await model.generateContent(audio, prompt) + + // Print the generated text, handling the case where it might be nil + print(response.text ?? "No text in response.") + } + + func testMultiModalAudioStreaming() async throws { + // Provide the audio as `Data` with the appropriate MIME type + let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg") + + // Provide a text prompt to include with the audio + let prompt = "Transcribe what's said in this audio recording." + + // To stream generated text output, call `generateContentStream` with the audio and text prompt + let contentStream = try model.generateContentStream(audio, prompt) + + // Print the generated text, handling the case where it might be nil + for try await chunk in contentStream { + if let text = chunk.text { + print(text) + } + } + } + + // MARK: - Document Input + + func testMultiModalPDFStreaming() async throws { + // Provide the PDF as `Data` with the appropriate MIME type + let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf") + + // Provide a text prompt to include with the PDF file + let prompt = "Summarize the important results in this report." + + // To stream generated text output, call `generateContentStream` with the PDF file and text + // prompt + let contentStream = try model.generateContentStream(pdf, prompt) + + // Print the generated text, handling the case where it might be nil + for try await chunk in contentStream { + if let text = chunk.text { + print(text) + } + } + } + + func testMultiModalPDFNonStreaming() async throws { + // Provide the PDF as `Data` with the appropriate MIME type + let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf") + + // Provide a text prompt to include with the PDF file + let prompt = "Summarize the important results in this report." + + // To generate text output, call `generateContent` with the PDF file and text prompt + let response = try await model.generateContent(pdf, prompt) + + // Print the generated text, handling the case where it might be nil + print(response.text ?? "No text in response.") + } }