firebase · andrewheard · Apr 22, 2025 · Apr 22, 2025
diff --git a/FirebaseVertexAI/Tests/Unit/Resources/gemini-report.pdf b/FirebaseVertexAI/Tests/Unit/Resources/gemini-report.pdf
diff --git a/FirebaseVertexAI/Tests/Unit/Resources/hello-world.mp3 b/FirebaseVertexAI/Tests/Unit/Resources/hello-world.mp3
diff --git a/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift b/FirebaseVertexAI/Tests/Unit/Snippets/MultimodalSnippets.swift
@@ -26,14 +26,28 @@ import XCTest
 @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
 final class MultimodalSnippets: XCTestCase {
   let bundle = BundleTestUtil.bundle()
-  lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-1.5-flash")
+  lazy var model = VertexAI.vertexAI().generativeModel(modelName: "gemini-2.0-flash")
   lazy var videoURL = {
     guard let url = bundle.url(forResource: "animals", withExtension: "mp4") else {
       fatalError("Video file animals.mp4 not found in Resources.")
     }
     return url
   }()
 
+  lazy var audioURL = {
+    guard let url = bundle.url(forResource: "hello-world", withExtension: "mp3") else {
+      fatalError("Audio file hello-world.mp3 not found in Resources.")
+    }
+    return url
+  }()
+
+  lazy var pdfURL = {
+    guard let url = bundle.url(forResource: "gemini-report", withExtension: "pdf") else {
+      fatalError("PDF file gemini-report.pdf not found in Resources.")
+    }
+    return url
+  }()
+
   override func setUpWithError() throws {
     try FirebaseApp.configureDefaultAppForSnippets()
   }
@@ -42,6 +56,8 @@ final class MultimodalSnippets: XCTestCase {
     await FirebaseApp.deleteDefaultAppForSnippets()
   }
 
+  // MARK: - Image Input
+
   #if canImport(UIKit)
     func testMultimodalOneImageNonStreaming() async throws {
       guard let image = UIImage(systemName: "bicycle") else { fatalError() }
@@ -98,6 +114,8 @@ final class MultimodalSnippets: XCTestCase {
     }
   #endif // canImport(UIKit)
 
+  // MARK: - Video Input
+
   func testMultimodalVideoNonStreaming() async throws {
     // Provide the video as `Data` with the appropriate MIME type
     let video = try InlineDataPart(data: Data(contentsOf: videoURL), mimeType: "video/mp4")
@@ -125,4 +143,73 @@ final class MultimodalSnippets: XCTestCase {
       }
     }
   }
+
+  // MARK: - Audio Input
+
+  func testMultiModalAudioNonStreaming() async throws {
+    // Provide the audio as `Data` with the appropriate MIME type
+    let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
+
+    // Provide a text prompt to include with the audio
+    let prompt = "Transcribe what's said in this audio recording."
+
+    // To generate text output, call `generateContent` with the audio and text prompt
+    let response = try await model.generateContent(audio, prompt)
+
+    // Print the generated text, handling the case where it might be nil
+    print(response.text ?? "No text in response.")
+  }
+
+  func testMultiModalAudioStreaming() async throws {
+    // Provide the audio as `Data` with the appropriate MIME type
+    let audio = try InlineDataPart(data: Data(contentsOf: audioURL), mimeType: "audio/mpeg")
+
+    // Provide a text prompt to include with the audio
+    let prompt = "Transcribe what's said in this audio recording."
+
+    // To stream generated text output, call `generateContentStream` with the audio and text prompt
+    let contentStream = try model.generateContentStream(audio, prompt)
+
+    // Print the generated text, handling the case where it might be nil
+    for try await chunk in contentStream {
+      if let text = chunk.text {
+        print(text)
+      }
+    }
+  }
+
+  // MARK: - Document Input
+
+  func testMultiModalPDFStreaming() async throws {
+    // Provide the PDF as `Data` with the appropriate MIME type
+    let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
+
+    // Provide a text prompt to include with the PDF file
+    let prompt = "Summarize the important results in this report."
+
+    // To stream generated text output, call `generateContentStream` with the PDF file and text
+    // prompt
+    let contentStream = try model.generateContentStream(pdf, prompt)
+
+    // Print the generated text, handling the case where it might be nil
+    for try await chunk in contentStream {
+      if let text = chunk.text {
+        print(text)
+      }
+    }
+  }
+
+  func testMultiModalPDFNonStreaming() async throws {
+    // Provide the PDF as `Data` with the appropriate MIME type
+    let pdf = try InlineDataPart(data: Data(contentsOf: pdfURL), mimeType: "application/pdf")
+
+    // Provide a text prompt to include with the PDF file
+    let prompt = "Summarize the important results in this report."
+
+    // To generate text output, call `generateContent` with the PDF file and text prompt
+    let response = try await model.generateContent(pdf, prompt)
+
+    // Print the generated text, handling the case where it might be nil
+    print(response.text ?? "No text in response.")
+  }
 }