Skip to content

Commit b576689

Browse files
seanzhougooglecopybara-github
authored andcommitted
refactor: refactor evaluation to make cli module depends on evaluation module. Modules outside of cli module should not reference cli module
PiperOrigin-RevId: 763577749
1 parent 178b18d commit b576689

File tree

6 files changed

+178
-92
lines changed

6 files changed

+178
-92
lines changed

src/google/adk/cli/cli_eval.py

Lines changed: 6 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
import importlib.util
1618
import json
1719
import logging
@@ -22,98 +24,20 @@
2224
from typing import Optional
2325
import uuid
2426

25-
from pydantic import Field
26-
2727
from ..agents import Agent
2828
from ..artifacts.base_artifact_service import BaseArtifactService
2929
from ..evaluation.eval_case import EvalCase
30-
from ..evaluation.eval_case import Invocation
30+
from ..evaluation.eval_metrics import EvalMetric
31+
from ..evaluation.eval_metrics import EvalMetricResult
32+
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
33+
from ..evaluation.eval_result import EvalCaseResult
3134
from ..evaluation.evaluator import EvalStatus
3235
from ..evaluation.evaluator import Evaluator
3336
from ..sessions.base_session_service import BaseSessionService
34-
from ..sessions.session import Session
35-
from .utils import common
3637

3738
logger = logging.getLogger("google_adk." + __name__)
3839

3940

40-
class EvalMetric(common.BaseModel):
41-
"""A metric used to evaluate a particular aspect of an eval case."""
42-
43-
metric_name: str
44-
"""The name of the metric."""
45-
46-
threshold: float
47-
"""A threshold value. Each metric decides how to interpret this threshold."""
48-
49-
50-
class EvalMetricResult(EvalMetric):
51-
"""The actual computed score/value of a particular EvalMetric."""
52-
53-
score: Optional[float] = None
54-
eval_status: EvalStatus
55-
56-
57-
class EvalMetricResultPerInvocation(common.BaseModel):
58-
"""Eval metric results per invocation."""
59-
60-
actual_invocation: Invocation
61-
"""The actual invocation, usually obtained by inferencing the agent."""
62-
63-
expected_invocation: Invocation
64-
"""The expected invocation, usually the reference or golden invocation."""
65-
66-
eval_metric_results: list[EvalMetricResult] = []
67-
"""Eval resutls for each applicable metric."""
68-
69-
70-
class EvalCaseResult(common.BaseModel):
71-
"""Case-level evaluation results."""
72-
73-
eval_set_file: str = Field(
74-
deprecated=True,
75-
description="This field is deprecated, use eval_set_id instead.",
76-
)
77-
eval_set_id: str = ""
78-
"""The eval set id."""
79-
80-
eval_id: str = ""
81-
"""The eval case id."""
82-
83-
final_eval_status: EvalStatus
84-
"""Final eval status for this eval case."""
85-
86-
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
87-
deprecated=True,
88-
description=(
89-
"This field is deprecated, use overall_eval_metric_results instead."
90-
),
91-
)
92-
93-
overall_eval_metric_results: list[EvalMetricResult]
94-
"""Overall result for each metric for the entire eval case."""
95-
96-
eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation]
97-
"""Result for each metric on a per invocation basis."""
98-
99-
session_id: str
100-
"""Session id of the session generated as result of inferencing/scraping stage of the eval."""
101-
102-
session_details: Optional[Session] = None
103-
"""Session generated as result of inferencing/scraping stage of the eval."""
104-
105-
user_id: Optional[str] = None
106-
"""User id used during inferencing/scraping stage of the eval."""
107-
108-
109-
class EvalSetResult(common.BaseModel):
110-
eval_set_result_id: str
111-
eval_set_result_name: str
112-
eval_set_id: str
113-
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
114-
creation_timestamp: float = 0.0
115-
116-
11741
MISSING_EVAL_DEPENDENCIES_MESSAGE = (
11842
"Eval module is not installed, please install via `pip install"
11943
" google-adk[eval]`."
@@ -227,8 +151,6 @@ async def run_evals(
227151
"""
228152
try:
229153
from ..evaluation.agent_evaluator import EvaluationGenerator
230-
from ..evaluation.response_evaluator import ResponseEvaluator
231-
from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
232154
except ModuleNotFoundError as e:
233155
raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e
234156

src/google/adk/cli/fast_api.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# limitations under the License.
1414

1515

16+
from __future__ import annotations
17+
1618
import asyncio
1719
from contextlib import asynccontextmanager
1820
import importlib
@@ -59,6 +61,10 @@
5961
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
6062
from ..evaluation.eval_case import EvalCase
6163
from ..evaluation.eval_case import SessionInput
64+
from ..evaluation.eval_metrics import EvalMetric
65+
from ..evaluation.eval_metrics import EvalMetricResult
66+
from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
67+
from ..evaluation.eval_result import EvalSetResult
6268
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
6369
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
6470
from ..events.event import Event
@@ -69,10 +75,6 @@
6975
from ..sessions.session import Session
7076
from ..sessions.vertex_ai_session_service import VertexAiSessionService
7177
from .cli_eval import EVAL_SESSION_ID_PREFIX
72-
from .cli_eval import EvalMetric
73-
from .cli_eval import EvalMetricResult
74-
from .cli_eval import EvalMetricResultPerInvocation
75-
from .cli_eval import EvalSetResult
7678
from .cli_eval import EvalStatus
7779
from .utils import cleanup
7880
from .utils import common
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Optional
18+
19+
from pydantic import alias_generators
20+
from pydantic import BaseModel
21+
from pydantic import ConfigDict
22+
23+
from .eval_case import Invocation
24+
from .evaluator import EvalStatus
25+
26+
27+
class EvalMetric(BaseModel):
28+
"""A metric used to evaluate a particular aspect of an eval case."""
29+
30+
model_config = ConfigDict(
31+
alias_generator=alias_generators.to_camel,
32+
populate_by_name=True,
33+
)
34+
35+
model_config = ConfigDict(
36+
alias_generator=alias_generators.to_camel,
37+
populate_by_name=True,
38+
)
39+
40+
metric_name: str
41+
"""The name of the metric."""
42+
43+
threshold: float
44+
"""A threshold value. Each metric decides how to interpret this threshold."""
45+
46+
47+
class EvalMetricResult(EvalMetric):
48+
"""The actual computed score/value of a particular EvalMetric."""
49+
50+
model_config = ConfigDict(
51+
alias_generator=alias_generators.to_camel,
52+
populate_by_name=True,
53+
)
54+
model_config = ConfigDict(
55+
alias_generator=alias_generators.to_camel,
56+
populate_by_name=True,
57+
)
58+
score: Optional[float] = None
59+
eval_status: EvalStatus
60+
61+
62+
class EvalMetricResultPerInvocation(BaseModel):
63+
"""Eval metric results per invocation."""
64+
65+
actual_invocation: Invocation
66+
"""The actual invocation, usually obtained by inferencing the agent."""
67+
68+
expected_invocation: Invocation
69+
"""The expected invocation, usually the reference or golden invocation."""
70+
71+
eval_metric_results: list[EvalMetricResult] = []
72+
"""Eval resutls for each applicable metric."""
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Optional
18+
19+
from pydantic import alias_generators
20+
from pydantic import BaseModel
21+
from pydantic import ConfigDict
22+
from pydantic import Field
23+
24+
from ..sessions.session import Session
25+
from .eval_metrics import EvalMetric
26+
from .eval_metrics import EvalMetricResult
27+
from .eval_metrics import EvalMetricResultPerInvocation
28+
from .evaluator import EvalStatus
29+
30+
31+
class EvalCaseResult(BaseModel):
32+
"""Case level evaluation results."""
33+
34+
model_config = ConfigDict(
35+
alias_generator=alias_generators.to_camel,
36+
populate_by_name=True,
37+
)
38+
39+
eval_set_file: str = Field(
40+
deprecated=True,
41+
description="This field is deprecated, use eval_set_id instead.",
42+
)
43+
eval_set_id: str = ""
44+
"""The eval set id."""
45+
46+
eval_id: str = ""
47+
"""The eval case id."""
48+
49+
final_eval_status: EvalStatus
50+
"""Final eval status for this eval case."""
51+
52+
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
53+
deprecated=True,
54+
description=(
55+
"This field is deprecated, use overall_eval_metric_results instead."
56+
),
57+
)
58+
59+
overall_eval_metric_results: list[EvalMetricResult]
60+
"""Overall result for each metric for the entire eval case."""
61+
62+
eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation]
63+
"""Result for each metric on a per invocation basis."""
64+
65+
session_id: str
66+
"""Session id of the session generated as result of inferencing/scraping stage of the eval."""
67+
68+
session_details: Optional[Session] = None
69+
"""Session generated as result of inferencing/scraping stage of the eval."""
70+
71+
user_id: Optional[str] = None
72+
"""User id used during inferencing/scraping stage of the eval."""
73+
74+
75+
class EvalSetResult(BaseModel):
76+
"""Eval set level evaluation results."""
77+
78+
model_config = ConfigDict(
79+
alias_generator=alias_generators.to_camel,
80+
populate_by_name=True,
81+
)
82+
eval_set_result_id: str
83+
eval_set_result_name: str
84+
eval_set_id: str
85+
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
86+
creation_timestamp: float = 0.0

src/google/adk/evaluation/eval_set_results_manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
from abc import ABC
1618
from abc import abstractmethod
1719

18-
from ..cli.cli_eval import EvalCaseResult
19-
from ..cli.cli_eval import EvalSetResult
20+
from .eval_result import EvalCaseResult
21+
from .eval_result import EvalSetResult
2022

2123

2224
class EvalSetResultsManager(ABC):

src/google/adk/evaluation/local_eval_set_results_manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,17 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
import json
1618
import logging
1719
import os
1820
import time
1921

2022
from typing_extensions import override
2123

22-
from ..cli.cli_eval import EvalCaseResult
23-
from ..cli.cli_eval import EvalSetResult
24+
from .eval_result import EvalCaseResult
25+
from .eval_result import EvalSetResult
2426
from .eval_set_results_manager import EvalSetResultsManager
2527

2628
logger = logging.getLogger("google_adk." + __name__)

0 commit comments

Comments
 (0)