feat: support basic function call for gemini (google-generativeai) (#17696)

ex0ns · web-flow · commit f938a1bd882a · 2025-02-05T14:36:38.000-06:00
diff --git a/llama-index-core/llama_index/core/agent/workflow/multi_agent_workflow.py b/llama-index-core/llama_index/core/agent/workflow/multi_agent_workflow.py
@@ -51,6 +51,8 @@
 {msg}
 """
 
+DEFAULT_HANDOFF_OUTPUT_PROMPT = "Agent {to_agent} is now handling the request due to the following reason: {reason}.\nPlease continue with the current request."
+
 
 async def handoff(ctx: Context, to_agent: str, reason: str) -> str:
     """Handoff control of that chat to the given agent."""
@@ -61,7 +63,11 @@ async def handoff(ctx: Context, to_agent: str, reason: str) -> str:
         return f"Agent {to_agent} not found. Please select a valid agent to hand off to. Valid agents: {valid_agents}"
 
     await ctx.set("next_agent", to_agent)
-    return f"Agent {to_agent} is now handling the request due to the following reason: {reason}.\nPlease continue."
+    handoff_output_prompt = await ctx.get(
+        "handoff_output_prompt", default=DEFAULT_HANDOFF_OUTPUT_PROMPT
+    )
+
+    return handoff_output_prompt.format(to_agent=to_agent, reason=reason)
 
 
 class AgentWorkflowMeta(WorkflowMeta, ABCMeta):
@@ -77,6 +83,7 @@ def __init__(
         initial_state: Optional[Dict] = None,
         root_agent: Optional[str] = None,
         handoff_prompt: Optional[Union[str, BasePromptTemplate]] = None,
+        handoff_output_prompt: Optional[Union[str, BasePromptTemplate]] = None,
         state_prompt: Optional[Union[str, BasePromptTemplate]] = None,
         timeout: Optional[float] = None,
         **workflow_kwargs: Any,
@@ -106,6 +113,18 @@ def __init__(
                 raise ValueError("Handoff prompt must contain {agent_info}")
         self.handoff_prompt = handoff_prompt
 
+        handoff_output_prompt = handoff_output_prompt or DEFAULT_HANDOFF_OUTPUT_PROMPT
+        if isinstance(handoff_output_prompt, str):
+            handoff_output_prompt = PromptTemplate(handoff_output_prompt)
+            if (
+                "{to_agent}" not in handoff_output_prompt.get_template()
+                or "{reason}" not in handoff_output_prompt.get_template()
+            ):
+                raise ValueError(
+                    "Handoff output prompt must contain {to_agent} and {reason}"
+                )
+        self.handoff_output_prompt = handoff_output_prompt
+
         state_prompt = state_prompt or DEFAULT_STATE_PROMPT
         if isinstance(state_prompt, str):
             state_prompt = PromptTemplate(state_prompt)
@@ -120,6 +139,7 @@ def _get_prompts(self) -> PromptDictType:
         """Get prompts."""
         return {
             "handoff_prompt": self.handoff_prompt,
+            "handoff_output_prompt": self.handoff_output_prompt,
             "state_prompt": self.state_prompt,
         }
 
@@ -131,6 +151,8 @@ def _update_prompts(self, prompts_dict: PromptDictType) -> None:
         """Update prompts."""
         if "handoff_prompt" in prompts_dict:
             self.handoff_prompt = prompts_dict["handoff_prompt"]
+        if "handoff_output_prompt" in prompts_dict:
+            self.handoff_output_prompt = prompts_dict["handoff_output_prompt"]
         if "state_prompt" in prompts_dict:
             self.state_prompt = prompts_dict["state_prompt"]
 
@@ -203,6 +225,10 @@ async def _init_context(self, ctx: Context, ev: StartEvent) -> None:
             await ctx.set("state", self.initial_state)
         if not await ctx.get("current_agent_name", default=None):
             await ctx.set("current_agent_name", self.root_agent)
+        if not await ctx.get("handoff_output_prompt", default=None):
+            await ctx.set(
+                "handoff_output_prompt", self.handoff_output_prompt.get_template()
+            )
 
     async def _call_tool(
         self,
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/base.py b/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/base.py
@@ -2,7 +2,8 @@
 
 import os
 import warnings
-from typing import Any, Dict, Optional, Sequence, cast
+import uuid
+from typing import TYPE_CHECKING, Union, List, Any, Dict, Optional, Sequence, cast
 
 import google.generativeai as genai
 from google.generativeai.types import generation_types
@@ -13,15 +14,17 @@
     ChatResponseGen,
     CompletionResponse,
     CompletionResponseGen,
+    CompletionResponseAsyncGen,
     LLMMetadata,
+    MessageRole,
 )
 from llama_index.core.bridge.pydantic import Field, PrivateAttr
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.constants import DEFAULT_NUM_OUTPUTS, DEFAULT_TEMPERATURE
+from llama_index.core.llms.llm import ToolSelection
 from llama_index.core.llms.callbacks import llm_chat_callback, llm_completion_callback
-from llama_index.core.llms.custom import CustomLLM
+from llama_index.core.llms.function_calling import FunctionCallingLLM
 from llama_index.core.utilities.gemini_utils import (
-    ROLES_FROM_GEMINI,
     merge_neighboring_same_role_messages,
 )
 
@@ -49,8 +52,11 @@
     "gemini-1.0-pro",
 )
 
+if TYPE_CHECKING:
+    from llama_index.core.tools.types import BaseTool
 
-class Gemini(CustomLLM):
+
+class Gemini(FunctionCallingLLM):
     """
     Gemini LLM.
 
@@ -181,6 +187,8 @@ def metadata(self) -> LLMMetadata:
             num_output=self.max_tokens,
             model_name=self.model,
             is_chat_model=True,
+            # All gemini models support function calling
+            is_function_calling_model=True,
         )
 
     @llm_completion_callback()
@@ -208,10 +216,30 @@ def stream_complete(
         self, prompt: str, formatted: bool = False, **kwargs: Any
     ) -> CompletionResponseGen:
         request_options = self._request_options or kwargs.pop("request_options", None)
-        it = self._model.generate_content(
-            prompt, stream=True, request_options=request_options, **kwargs
-        )
-        yield from map(completion_from_gemini_response, it)
+
+        def gen():
+            it = self._model.generate_content(
+                prompt, stream=True, request_options=request_options, **kwargs
+            )
+            for r in it:
+                yield completion_from_gemini_response(r)
+
+        return gen()
+
+    @llm_completion_callback()
+    def astream_complete(
+        self, prompt: str, formatted: bool = False, **kwargs: Any
+    ) -> CompletionResponseAsyncGen:
+        request_options = self._request_options or kwargs.pop("request_options", None)
+
+        async def gen():
+            it = await self._model.generate_content_async(
+                prompt, stream=True, request_options=request_options, **kwargs
+            )
+            async for r in it:
+                yield completion_from_gemini_response(r)
+
+        return gen()
 
     @llm_chat_callback()
     def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
@@ -256,19 +284,11 @@ def gen() -> ChatResponseGen:
             for r in response:
                 top_candidate = r.candidates[0]
                 content_delta = top_candidate.content.parts[0].text
-                role = ROLES_FROM_GEMINI[top_candidate.content.role]
-                raw = {
-                    **(type(top_candidate).to_dict(top_candidate)),  # type: ignore
-                    **(
-                        type(response.prompt_feedback).to_dict(response.prompt_feedback)  # type: ignore
-                    ),
-                }
                 content += content_delta
-                yield ChatResponse(
-                    message=ChatMessage(role=role, content=content),
-                    delta=content_delta,
-                    raw=raw,
-                )
+                llama_resp = chat_from_gemini_response(r)
+                llama_resp.delta = content_delta
+                llama_resp.message.content = content
+                yield llama_resp
 
         return gen()
 
@@ -278,7 +298,7 @@ async def astream_chat(
     ) -> ChatResponseAsyncGen:
         request_options = self._request_options or kwargs.pop("request_options", None)
         merged_messages = merge_neighboring_same_role_messages(messages)
-        *history, next_msg = map(chat_message_to_gemini, messages)
+        *history, next_msg = map(chat_message_to_gemini, merged_messages)
         chat = self._model.start_chat(history=history)
         response = await chat.send_message_async(
             next_msg, stream=True, request_options=request_options, **kwargs
@@ -289,18 +309,86 @@ async def gen() -> ChatResponseAsyncGen:
             async for r in response:
                 top_candidate = r.candidates[0]
                 content_delta = top_candidate.content.parts[0].text
-                role = ROLES_FROM_GEMINI[top_candidate.content.role]
-                raw = {
-                    **(type(top_candidate).to_dict(top_candidate)),  # type: ignore
-                    **(
-                        type(response.prompt_feedback).to_dict(response.prompt_feedback)  # type: ignore
-                    ),
-                }
                 content += content_delta
-                yield ChatResponse(
-                    message=ChatMessage(role=role, content=content),
-                    delta=content_delta,
-                    raw=raw,
-                )
+                llama_resp = chat_from_gemini_response(r)
+                llama_resp.delta = content_delta
+                llama_resp.message.content = content
+                yield llama_resp
 
         return gen()
+
+    def _prepare_chat_with_tools(
+        self,
+        tools: Sequence["BaseTool"],
+        user_msg: Optional[Union[str, ChatMessage]] = None,
+        chat_history: Optional[List[ChatMessage]] = None,
+        verbose: bool = False,
+        allow_parallel_tool_calls: bool = False,
+        tool_choice: Union[str, dict] = "auto",
+        strict: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Predict and call the tool."""
+        from google.generativeai.types import FunctionDeclaration, ToolDict
+
+        tool_declarations = []
+        for tool in tools:
+            descriptions = {}
+            for param_name, param_schema in tool.metadata.get_parameters_dict()[
+                "properties"
+            ].items():
+                param_description = param_schema.get("description", None)
+                if param_description:
+                    descriptions[param_name] = param_description
+
+            tool.metadata.fn_schema.__doc__ = tool.metadata.description
+            tool_declarations.append(
+                FunctionDeclaration.from_function(tool.metadata.fn_schema, descriptions)
+            )
+
+        if isinstance(user_msg, str):
+            user_msg = ChatMessage(role=MessageRole.USER, content=user_msg)
+
+        messages = chat_history or []
+        if user_msg:
+            messages.append(user_msg)
+
+        return {
+            "messages": messages,
+            "tools": ToolDict(function_declarations=tool_declarations)
+            if tool_declarations
+            else None,
+            **kwargs,
+        }
+
+    def get_tool_calls_from_response(
+        self,
+        response: ChatResponse,
+        error_on_no_tool_call: bool = True,
+        **kwargs: Any,
+    ) -> List[ToolSelection]:
+        """Predict and call the tool."""
+        tool_calls = response.message.additional_kwargs.get("tool_calls", [])
+
+        if len(tool_calls) < 1:
+            if error_on_no_tool_call:
+                raise ValueError(
+                    f"Expected at least one tool call, but got {len(tool_calls)} tool calls."
+                )
+            else:
+                return []
+
+        tool_selections = []
+        for tool_call in tool_calls:
+            if not isinstance(tool_call, genai.protos.FunctionCall):
+                raise ValueError("Invalid tool_call object")
+
+            tool_selections.append(
+                ToolSelection(
+                    tool_id=str(uuid.uuid4()),
+                    tool_name=tool_call.name,
+                    tool_kwargs=dict(tool_call.args),
+                )
+            )
+
+        return tool_selections
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py b/llama-index-integrations/llms/llama-index-llms-gemini/llama_index/llms/gemini/utils.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Dict, Any
 
 import google.ai.generativelanguage as glm
 import google.generativeai as genai
@@ -68,13 +68,35 @@ def chat_from_gemini_response(
             response.usage_metadata
         )
     role = ROLES_FROM_GEMINI[top_candidate.content.role]
-    return ChatResponse(message=ChatMessage(role=role, content=response.text), raw=raw)
+    try:
+        # When the response contains only a function call, the library
+        # raises an exception.
+        # The easiest way to detect this is to try access the text attribute and
+        # catch the exception.
+        # https://github.com/google-gemini/generative-ai-python/issues/670
+        text = response.text
+    except (ValueError, AttributeError):
+        text = None
+
+    additional_kwargs: Dict[str, Any] = {}
+    for part in response.parts:
+        if fn := part.function_call:
+            if "tool_calls" not in additional_kwargs:
+                additional_kwargs["tool_calls"] = []
+            additional_kwargs["tool_calls"].append(fn)
+
+    return ChatResponse(
+        message=ChatMessage(
+            role=role, content=text, additional_kwargs=additional_kwargs
+        ),
+        raw=raw,
+        additional_kwargs=additional_kwargs,
+    )
 
 
 def chat_message_to_gemini(message: ChatMessage) -> "genai.types.ContentDict":
     """Convert ChatMessages to Gemini-specific history, including ImageDocuments."""
     parts = []
-    content_txt = ""
     for block in message.blocks:
         if isinstance(block, TextBlock):
             parts.append(block.text)
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-gemini/pyproject.toml
@@ -27,7 +27,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-gemini"
 readme = "README.md"
-version = "0.4.6"
+version = "0.4.7"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py b/llama-index-integrations/llms/llama-index-llms-gemini/tests/test_llms_gemini.py