Replies: 3 comments
-
That's good suggestion. @polong-lin we can consider add some of such patterns. |
Beta Was this translation helpful? Give feedback.
-
Hi @XinyueZ , if you can share your code we can help taking a look. |
Beta Was this translation helpful? Give feedback.
-
Hey @boyangsvl I define a //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// import asyncio
import os
from typing import Any, AsyncGenerator, Dict, Optional
from dotenv import load_dotenv
from google.adk.agents import Agent, LiveRequest, LiveRequestQueue
from google.adk.agents.callback_context import CallbackContext
from google.adk.agents.run_config import RunConfig, StreamingMode
from google.adk.models import LlmRequest, LlmResponse
from google.adk.runners import Event, Runner
from google.adk.sessions import InMemorySessionService, Session
from google.adk.tools.agent_tool import AgentTool
from google.adk.tools.base_tool import BaseTool
from google.adk.tools.google_search_tool import google_search
from google.adk.tools.tool_context import ToolContext
from google.genai import types
from loguru import logger
from pydantic import BaseModel, Field
from rich.console import Console
from rich.markdown import Markdown
load_dotenv()
os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # os.getenv("GOOGLE_CLOUD_REGION")
os.environ["GOOGLE_CLOUD_PROJECT"] = os.getenv("GOOGLE_CLOUD_PROJECT")
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True"
APP_NAME = "podcast_agent_app"
USER_ID = "root_podcast_agent_user"
SESSION_ID_AGENT = "session_root_podcast_agent"
WEB_SEARCH_MODEL = "gemini-2.0-flash"
PLAN_MODEL = "gemini-2.0-flash"
DRAFT_MODEL = "gemini-2.0-flash"
REVIEW_MODEL = "gemini-2.0-flash"
FINAL_MODEL = "gemini-2.0-flash"
PRODUCER_MODEL = "gemini-2.0-flash"
PRODUCER_PODCAST_MODEL = "gemini-2.0-flash-live-preview-04-09"
SUPERVISOR_MODEL = "gemini-2.5-flash-preview-04-17"
NOTE_DOWN_MODEL = "gemini-2.0-flash"
LANGUAGE_CODE = "en-US"
session_service = InMemorySessionService()
session = session_service.create_session(
app_name=APP_NAME,
user_id=USER_ID,
session_id=SESSION_ID_AGENT,
)
def _plain_text(text: str):
# with threading.Lock():
# st.success(f"{text}", icon="✨")
logger.info(text)
def _model_text(text: str):
# with threading.Lock():
# st.success(f"{text}", icon="🧠")
logger.info(text)
def _agent_text(text: str):
# with threading.Lock():
# st.success(f"{text}", icon="🤖")
logger.info(text)
def _tool_text(text: str):
# with threading.Lock():
# st.success(f"{text}", icon="💡")
logger.info(text)
def on_before_agent(callback_context: CallbackContext) -> Optional[types.Content]:
agent_name = callback_context.agent_name
_agent_text(f"Agent `{agent_name}` is being checked whether should start...")
return None
def on_after_agent(callback_context: CallbackContext) -> Optional[types.Content]:
agent_name = callback_context.agent_name
_agent_text(f"Agent `{agent_name}` started..")
return None
def on_before_model_modifier(
callback_context: CallbackContext, llm_request: LlmRequest
) -> Optional[LlmResponse]:
agent_name = callback_context.agent_name
_model_text(f"Agent `{agent_name}` is thinking...")
return None
def on_after_model_modifier(
callback_context: CallbackContext, llm_response: LlmResponse
) -> Optional[LlmResponse]:
agent_name = callback_context.agent_name
_model_text(f"Agent `{agent_name}` has finished thinking.")
return None
def on_before_tool_modifier(
tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext
) -> Optional[Dict]:
agent_name = tool_context.agent_name
tool_name = tool.name
_tool_text(f"Tool `{tool_name}` used with agent `{agent_name}`, please wait...")
return None
def on_after_tool_modifier(
tool: BaseTool, args: Dict[str, Any], tool_context: ToolContext, tool_response: Dict
) -> Optional[Dict]:
agent_name = tool_context.agent_name
tool_name = tool.name
_tool_text(
f"Tool `{tool_name}` used with agent `{agent_name}`, please wait, I will give answer."
)
return None
def note_down(result: str) -> dict:
"""Note down the result in markdown format.
Args:
result (str): The result in markdown format.
Returns:
dict: status
"""
logger.debug(f"Noting result")
console = Console(record=True, soft_wrap=True)
md = Markdown(result, justify="left")
console.print(md)
_plain_text(result)
return {"status": "success", "content": result}
web_search_tool = AgentTool(
agent=Agent(
model=WEB_SEARCH_MODEL,
name="web_search_tool",
description="An agent providing web search grounding capability",
instruction="""Answer the user's question directly using web search grounding tool;
Provide a brief but concise response.
Rather than a detail response. Do not ask the user to check or look up information for themselves, that's your role; do your best to be informative.""",
tools=[google_search],
)
)
class PodcastProducerAgentInputSchema(BaseModel):
transcript: str = Field(
description="The transcript of the conversation between the hosts: contains the transcript of the conversation between the hosts."
)
host_1: str = Field(
description="The host 1 of the podcast: contains the host 1 of the podcast."
)
host_2: str = Field(
description="The host 2 of the podcast: contains the host 2 of the podcast."
)
podcast_producer_agent = Agent(
name="podcast_producer_agent",
model=PRODUCER_PODCAST_MODEL,
description="""A highly professional agent for generating podcast audio from transcript and host information.""",
instruction="""You are a professional podcast audio producer. Your task is to read aloud only the conversation between the two hosts, strictly following the provided transcript and host information.
Input:
- transcript: The full conversation transcript between two hosts, in plain text. Only the conversation should be used.
- host_1: Information about the first host (e.g., name, gender, style, etc).
- host_2: Information about the second host (e.g., name, gender, style, etc).
Instructions:
1. Read out only the conversation between the hosts. Do NOT include any other information, such as title, music, intro, outro, or prompts.
2. For each line, use the host information—especially gender, personality, and style—to mimic the appropriate tone, voice, and speaking style of the speaking host. Do NOT explicitly say the host's name; use only natural voice acting to distinguish speakers.
3. The output must be a natural, professional podcast conversation, with clear distinction between the two hosts' voices and personalities through voice acting and tone, not by narration or labels.
4. Absolutely do NOT add or read any content other than the hosts' conversation. No system messages, labels, or extra narration.
5. The result should sound like a real podcast, strictly limited to the hosts' dialogue, and the listener can distinguish the speakers by their voice and style.
""",
input_schema=PodcastProducerAgentInputSchema,
generate_content_config=types.GenerateContentConfig(
top_p=1.0,
temperature=1.0,
top_k=40,
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
language_code=LANGUAGE_CODE,
),
),
before_agent_callback=on_before_agent,
after_agent_callback=on_after_agent,
before_model_callback=on_before_model_modifier,
after_model_callback=on_after_model_modifier,
before_tool_callback=on_before_tool_modifier,
after_tool_callback=on_after_tool_modifier,
)
async def produce_podcast(transcript: str, host_1: str, host_2: str):
"""Produce podcast audio from transcript and host information.
Args:
transcript (str): The transcript of the podcast.
host_1 (str): The host 1 of the podcast.
host_2 (str): The host 2 of the podcast.
"""
async def call_agent_audio_async(
session: Session,
live_request_queue: LiveRequestQueue,
run_config: RunConfig,
user_input: str,
) -> bytes:
def has_inline_data(event: Event) -> bool:
return (
event.content
and event.content.parts
and len(event.content.parts) > 0
and event.content.parts[0].inline_data is not None
)
async def client2agent(live_request_queue: LiveRequestQueue, user_input: str):
"""Run agent to send user input to agent."""
content = types.Content(
role="user", parts=[types.Part.from_text(text=user_input)]
)
request = LiveRequest(content=content, close=False)
live_request_queue.send(request)
async def agent2client(live_events: AsyncGenerator[Event, None]) -> bytes:
audio_data = b""
async for event in live_events:
if event.turn_complete:
return audio_data
if has_inline_data(event):
logger.debug("🎶")
audio_data += event.content.parts[0].inline_data.data
live_events = runner.run_live(
session=session,
live_request_queue=live_request_queue,
run_config=run_config,
)
task_client2agent = asyncio.create_task(
client2agent(live_request_queue=live_request_queue, user_input=user_input)
)
task_agent2client = asyncio.create_task(agent2client(live_events))
await asyncio.gather(task_client2agent, task_agent2client)
return task_agent2client.result()
runner = Runner(
agent=podcast_producer_agent,
app_name=APP_NAME,
session_service=session_service,
)
live_request_queue = LiveRequestQueue()
user_input = f"""I have podcast transcript and additional information for you to generate the final podcast:
host_1: {host_1}
host_2: {host_2}
transcript:
---Start of transcript---
{transcript}
---End of transcript---
"""
audio_data = await call_agent_audio_async(
session=session,
live_request_queue=live_request_queue,
run_config=RunConfig(
response_modalities=["AUDIO"],
streaming_mode=StreamingMode.NONE,
speech_config=types.SpeechConfig(
language_code=LANGUAGE_CODE,
),
),
user_input=user_input,
)
live_request_queue.close()
import wave
with wave.open("./output/adk_podcast.wav", "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(24000)
wf.writeframes(audio_data)
logger.success("Audio saved to ./output/adk_podcast.wav")
class NoteDownAgentInputSchema(BaseModel):
text: str = Field(description="The text of information that can note down")
note_down_agent = Agent(
name="note_down_agent",
model=NOTE_DOWN_MODEL,
description="""Agent for noting down the information using `note_down` tool.""",
instruction="""As `note_down_agent`, you will note down the input information in plain text format.
- Call the `note_down` tool to complete the note.
- Do not perform any other actions.
""",
input_schema=NoteDownAgentInputSchema,
generate_content_config=types.GenerateContentConfig(
top_p=1.0,
temperature=1.0,
top_k=40,
response_modalities=["TEXT"],
speech_config=types.SpeechConfig(
language_code=LANGUAGE_CODE,
),
),
tools=[note_down],
before_agent_callback=on_before_agent,
after_agent_callback=on_after_agent,
before_model_callback=on_before_model_modifier,
after_model_callback=on_after_model_modifier,
before_tool_callback=on_before_tool_modifier,
after_tool_callback=on_after_tool_modifier,
)
class PlanAgentInputSchema(BaseModel):
topic_source: str = Field(
description="The topic source: contains the origin information of the topic, this can be anything."
)
podcast_flavor: str = Field(
description="The podcast flavor: contains what kind of podcast should be created based on the `topic_source`."
)
host_1: str = Field(description="The host 1: contains the host 1 of the podcast.")
host_2: str = Field(description="The host 2: contains the host 2 of the podcast.")
language: str = Field(
description="The language of the podcast: contains the language of the podcast."
)
plan_agent = Agent(
name="plan_agent",
model=PLAN_MODEL,
description="""Agent for planning the podcast.""",
instruction="""As `plan_agent`, you will plan the podcast in plain text format.
You get input from user and try to make tha plan for a podcast with 2 people, one male and one female.
In order to make a plan, you will have the scheme of {
{
{
{
{
{
|
Beta Was this translation helpful? Give feedback.
Uh oh!
There was an error while loading. Please reload this page.
-
Hey,
I have tried to use
sub_agents
to implement thesupervisor
, however, no matter how I write prompts in the supervisor, it only executes the first agent and does not pass information to other agents.Essentially, it is about the handoff in the transfer between different agents.
Do we need to implement a handoff tool?
Langchain, llama-index, and openai agent SDK all have similar implementations. I would like to ask if ADK has any tips in this regard.
Thank you.
Beta Was this translation helpful? Give feedback.
All reactions