Skip to content

Commit 79681e3

Browse files
google-genai-botcopybara-github
authored andcommitted
Write eval results locally from adk eval cli.
PiperOrigin-RevId: 762499588
1 parent 33921d5 commit 79681e3

File tree

2 files changed

+46
-10
lines changed

2 files changed

+46
-10
lines changed

src/google/adk/cli/cli_tools_click.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,14 @@
1313
# limitations under the License.
1414

1515
import asyncio
16+
import collections
1617
from contextlib import asynccontextmanager
1718
from datetime import datetime
1819
import logging
1920
import os
2021
import tempfile
22+
from typing import AsyncGenerator
23+
from typing import Coroutine
2124
from typing import Optional
2225

2326
import click
@@ -27,6 +30,8 @@
2730
from . import cli_create
2831
from . import cli_deploy
2932
from .. import version
33+
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
34+
from ..sessions.in_memory_session_service import InMemorySessionService
3035
from .cli import run_cli
3136
from .cli_eval import MISSING_EVAL_DEPENDENCIES_MESSAGE
3237
from .fast_api import get_fast_api_app
@@ -306,7 +311,7 @@ def cli_eval(
306311
EvalMetric(metric_name=metric_name, threshold=threshold)
307312
)
308313

309-
print(f"Using evaluation creiteria: {evaluation_criteria}")
314+
print(f"Using evaluation criteria: {evaluation_criteria}")
310315

311316
root_agent = get_root_agent(agent_module_file_path)
312317
reset_func = try_get_reset_func(agent_module_file_path)
@@ -325,21 +330,47 @@ def cli_eval(
325330
e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
326331
]
327332

328-
eval_set_id_to_eval_cases[eval_set_file_path] = eval_cases
333+
eval_set_id_to_eval_cases[eval_set.eval_set_id] = eval_cases
329334

330335
async def _collect_eval_results() -> list[EvalCaseResult]:
331-
return [
332-
result
333-
async for result in run_evals(
334-
eval_set_id_to_eval_cases, root_agent, reset_func, eval_metrics
335-
)
336-
]
336+
session_service = InMemorySessionService()
337+
eval_case_results = []
338+
async for eval_case_result in run_evals(
339+
eval_set_id_to_eval_cases,
340+
root_agent,
341+
reset_func,
342+
eval_metrics,
343+
session_service=session_service,
344+
):
345+
eval_case_result.session_details = await session_service.get_session(
346+
app_name=os.path.basename(agent_module_file_path),
347+
user_id=eval_case_result.user_id,
348+
session_id=eval_case_result.session_id,
349+
)
350+
eval_case_results.append(eval_case_result)
351+
return eval_case_results
337352

338353
try:
339354
eval_results = asyncio.run(_collect_eval_results())
340355
except ModuleNotFoundError:
341356
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
342357

358+
# Write eval set results.
359+
local_eval_set_results_manager = LocalEvalSetResultsManager(
360+
agent_dir=os.path.dirname(agent_module_file_path)
361+
)
362+
eval_set_id_to_eval_results = collections.defaultdict(list)
363+
for eval_case_result in eval_results:
364+
eval_set_id = eval_case_result.eval_set_id
365+
eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
366+
367+
for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
368+
local_eval_set_results_manager.save_eval_set_result(
369+
app_name=os.path.basename(agent_module_file_path),
370+
eval_set_id=eval_set_id,
371+
eval_case_results=eval_case_results,
372+
)
373+
343374
print("*********************************************************************")
344375
eval_run_summary = {}
345376

src/google/adk/evaluation/local_eval_set_results_manager.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
3030

3131

32+
def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
33+
return eval_set_result_name.replace("/", "_")
34+
35+
3236
class LocalEvalSetResultsManager(EvalSetResultsManager):
3337
"""An EvalSetResult manager that stores eval set results locally on disk."""
3438

@@ -44,9 +48,10 @@ def save_eval_set_result(
4448
) -> None:
4549
"""Creates and saves a new EvalSetResult given eval_case_results."""
4650
timestamp = time.time()
47-
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
51+
eval_set_result_id = app_name + "_" + eval_set_id + "_" + str(timestamp)
52+
eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
4853
eval_set_result = EvalSetResult(
49-
eval_set_result_id=eval_set_result_name,
54+
eval_set_result_id=eval_set_result_id,
5055
eval_set_result_name=eval_set_result_name,
5156
eval_set_id=eval_set_id,
5257
eval_case_results=eval_case_results,

0 commit comments

Comments
 (0)