@@ -15,9 +15,21 @@ def mock_project() -> Generator[Mock, None, None]:
15
15
is_bad_response = True ,
16
16
expert_answer = None ,
17
17
eval_scores = {
18
- "response_helpfulness" : EvalScores (score = 0.95 , failed = False ),
19
- "trustworthiness" : EvalScores (score = 0.5 , failed = True ),
18
+ "response_helpfulness" : EvalScores (
19
+ score = 0.95 ,
20
+ triggered = False ,
21
+ triggered_escalation = False ,
22
+ triggered_guardrail = False ,
23
+ ),
24
+ "trustworthiness" : EvalScores (
25
+ score = 0.5 ,
26
+ triggered = True ,
27
+ triggered_escalation = True ,
28
+ triggered_guardrail = True ,
29
+ ),
20
30
},
31
+ escalated_to_sme = True ,
32
+ should_guardrail = True ,
21
33
)
22
34
mock .from_access_key .return_value = mock_obj
23
35
yield mock
@@ -31,9 +43,21 @@ def mock_project_with_custom_thresholds() -> Generator[Mock, None, None]:
31
43
is_bad_response = False ,
32
44
expert_answer = None ,
33
45
eval_scores = {
34
- "response_helpfulness" : EvalScores (score = 0.95 , failed = False ),
35
- "trustworthiness" : EvalScores (score = 0.5 , failed = False ),
46
+ "response_helpfulness" : EvalScores (
47
+ score = 0.95 ,
48
+ triggered = False ,
49
+ triggered_escalation = False ,
50
+ triggered_guardrail = False ,
51
+ ),
52
+ "trustworthiness" : EvalScores (
53
+ score = 0.5 ,
54
+ triggered = False ,
55
+ triggered_escalation = False ,
56
+ triggered_guardrail = False ,
57
+ ),
36
58
},
59
+ escalated_to_sme = False ,
60
+ should_guardrail = False ,
37
61
)
38
62
mock .from_access_key .return_value = mock_obj
39
63
yield mock
@@ -66,9 +90,21 @@ def test_validate_expert_answer(self, mock_project: Mock) -> None:
66
90
is_bad_response = True ,
67
91
expert_answer = "expert answer" ,
68
92
eval_scores = {
69
- "response_helpfulness" : EvalScores (score = 0.95 , failed = False ),
70
- "trustworthiness" : EvalScores (score = 0.5 , failed = True ),
93
+ "response_helpfulness" : EvalScores (
94
+ score = 0.95 ,
95
+ triggered = False ,
96
+ triggered_escalation = False ,
97
+ triggered_guardrail = False ,
98
+ ),
99
+ "trustworthiness" : EvalScores (
100
+ score = 0.5 ,
101
+ triggered = True ,
102
+ triggered_escalation = True ,
103
+ triggered_guardrail = True ,
104
+ ),
71
105
},
106
+ escalated_to_sme = True ,
107
+ should_guardrail = True ,
72
108
)
73
109
# Basically any response will be flagged as untrustworthy
74
110
result = validator .validate (query = "test query" , context = "test context" , response = "test response" )
0 commit comments