11import base64
2+ import time
23from os import getenv
3- from typing import Optional
4+ from typing import Any , Optional
45from uuid import uuid4
56
67from agno .agent import Agent
7- from agno .media import ImageArtifact
8+ from agno .media import ImageArtifact , VideoArtifact
89from agno .tools import Toolkit
9- from agno .utils .log import log_debug , log_error
10+ from agno .utils .log import log_debug , log_error , log_info
1011
1112try :
1213 from google .genai import Client
14+ from google .genai .types import GenerateImagesResponse , GenerateVideosOperation
1315except (ModuleNotFoundError , ImportError ):
1416 raise ImportError ("`google-genai` not installed. Please install using `pip install google-genai`" )
1517
1618
1719class GeminiTools (Toolkit ):
18- """Tools for interacting with Google Gemini API (including Imagen for images) """
20+ """Tools for interacting with Google Gemini API"""
1921
2022 def __init__ (
2123 self ,
2224 api_key : Optional [str ] = None ,
25+ vertexai : bool = False ,
26+ project_id : Optional [str ] = None ,
27+ location : Optional [str ] = None ,
2328 image_generation_model : str = "imagen-3.0-generate-002" ,
29+ video_generation_model : str = "veo-2.0-generate-001" ,
2430 ** kwargs ,
2531 ):
26- super ().__init__ (name = "gemini_tools" , tools = [self .generate_image ], ** kwargs )
32+ super ().__init__ (name = "gemini_tools" , tools = [self .generate_image , self . generate_video ], ** kwargs )
2733
34+ # Set mode and credentials: use only provided vertexai parameter
35+ self .vertexai = vertexai
36+ self .project_id = project_id
37+ self .location = location
38+
39+ # Load API key from argument or environment
2840 self .api_key = api_key or getenv ("GOOGLE_API_KEY" )
29- if not self .api_key :
30- raise ValueError (
31- "GOOGLE_API_KEY not set. Please provide api_key or set the GOOGLE_API_KEY environment variable."
32- )
41+ if not self .vertexai and not self .api_key :
42+ log_error ("GOOGLE_API_KEY not set. Please set the GOOGLE_API_KEY environment variable." )
43+ raise ValueError ("GOOGLE_API_KEY not set. Please provide api_key or set the environment variable." )
44+
45+ # Prepare client parameters
46+ client_params : dict [str , Any ] = {}
47+ if self .vertexai :
48+ log_info ("Using Vertex AI API" )
49+ client_params ["vertexai" ] = True
50+ client_params ["project" ] = self .project_id or getenv ("GOOGLE_CLOUD_PROJECT" )
51+ client_params ["location" ] = self .location or getenv ("GOOGLE_CLOUD_LOCATION" )
52+ else :
53+ log_info ("Using Gemini API" )
54+ client_params ["api_key" ] = self .api_key
3355
3456 try :
35- self .client = Client (api_key = self . api_key )
57+ self .client = Client (** client_params )
3658 log_debug ("Google GenAI Client created successfully." )
3759 except Exception as e :
3860 log_error (f"Failed to create Google GenAI Client: { e } " , exc_info = True )
3961 raise ValueError (f"Failed to create Google GenAI Client. Error: { e } " )
4062
4163 self .image_model = image_generation_model
64+ self .video_model = video_generation_model
4265
4366 def generate_image (
4467 self ,
@@ -54,40 +77,89 @@ def generate_image(
5477 """
5578
5679 try :
57- response = self .client .models .generate_images (
80+ response : GenerateImagesResponse = self .client .models .generate_images (
5881 model = self .image_model ,
5982 prompt = prompt ,
6083 )
6184
6285 log_debug ("DEBUG: Raw Gemini API response" )
6386
64- image_bytes = None
65- actual_mime_type = "image/png"
87+ # Extract image bytes
88+ image_bytes = response .generated_images [0 ].image .image_bytes
89+ for generated_image in response .generated_images :
90+ image_bytes = generated_image .image .image_bytes
91+ if not image_bytes :
92+ log_error ("No valid image data extracted." )
93+ return "Failed to generate image: No valid image data extracted."
94+ base64_encoded_image_bytes = base64 .b64encode (image_bytes )
95+ actual_mime_type = "image/png"
96+
97+ media_id = str (uuid4 ())
98+ agent .add_image (
99+ ImageArtifact (
100+ id = media_id ,
101+ content = base64_encoded_image_bytes ,
102+ original_prompt = prompt ,
103+ mime_type = actual_mime_type ,
104+ )
105+ )
106+ log_debug (f"Successfully generated image { media_id } with model { self .image_model } " )
107+ return "Image generated successfully"
66108
67- if response .generated_images and response .generated_images [0 ].image .image_bytes :
68- image_bytes = response .generated_images [0 ].image .image_bytes
69- else :
70- log_error ("No image data found in the response structure." )
71- return "Failed to generate image: No valid image data extracted."
109+ except Exception as e :
110+ log_error (f"Failed to generate image: Client or method not available ({ e } )" )
111+ return f"Failed to generate image: Client or method not available ({ e } )"
72112
73- if image_bytes is None :
74- log_error ("image_bytes is None after extraction." )
75- return "Failed to generate image: No valid image data extracted."
113+ def generate_video (
114+ self ,
115+ agent : Agent ,
116+ prompt : str ,
117+ ) -> str :
118+ """Generate a video based on a text prompt.
119+ Args:
120+ prompt (str): The text prompt to generate the video from.
121+ Returns:
122+ str: A message indicating success or failure.
123+ """
124+ # Video generation requires Vertex AI mode.
125+ if not self .vertexai :
126+ log_error ("Video generation requires Vertex AI mode. Please enable Vertex AI mode." )
127+ return (
128+ "Video generation requires Vertex AI mode. "
129+ "Please set `vertexai=True` or environment variable `GOOGLE_GENAI_USE_VERTEXAI=true`."
130+ )
76131
77- base64_encoded_image_bytes = base64 . b64encode ( image_bytes )
132+ from google . genai . types import GenerateVideosConfig
78133
79- media_id = str (uuid4 ())
80- agent .add_image (
81- ImageArtifact (
82- id = media_id ,
83- content = base64_encoded_image_bytes ,
84- original_prompt = prompt ,
85- mime_type = actual_mime_type ,
86- )
134+ try :
135+ operation : GenerateVideosOperation = self .client .models .generate_videos (
136+ model = self .video_model ,
137+ prompt = prompt ,
138+ config = GenerateVideosConfig (
139+ enhance_prompt = True ,
140+ ),
87141 )
88- log_debug (f"Successfully generated image { media_id } with model { self .image_model } " )
89- return f"Image generated successfully with ID: { media_id } "
90142
143+ while not operation .done :
144+ time .sleep (5 )
145+ operation = self .client .operations .get (operation = operation )
146+
147+ for video in operation .result .generated_videos :
148+ generated_video = video .video
149+
150+ media_id = str (uuid4 ())
151+ encoded_video = base64 .b64encode (generated_video .video_bytes ).decode ("utf-8" )
152+
153+ agent .add_video (
154+ VideoArtifact (
155+ id = media_id ,
156+ content = encoded_video ,
157+ original_prompt = prompt ,
158+ mime_type = generated_video .mime_type ,
159+ )
160+ )
161+ log_debug (f"Successfully generated video { media_id } with model { self .video_model } " )
162+ return "Video generated successfully"
91163 except Exception as e :
92- log_error (f"Failed to generate image: Client or method not available ( { e } ) " )
93- return f"Failed to generate image: Client or method not available ( { e } ) "
164+ log_error (f"Failed to generate video: { e } " )
165+ return f"Failed to generate video: { e } "
0 commit comments