1
- import dspy
1
+ from dspy .predict .chain_of_thought import ChainOfThought
2
+ from dspy .primitives import Module
3
+ from dspy .signatures import InputField , OutputField , Signature
2
4
3
5
4
- class SemanticRecallPrecision (dspy . Signature ):
6
+ class SemanticRecallPrecision (Signature ):
5
7
"""
6
8
Compare a system's response to the ground truth to compute its recall and precision.
7
9
If asked to reason, enumerate key ideas in each response, and whether they are present in the other response.
8
10
"""
9
11
10
- question : str = dspy . InputField ()
11
- ground_truth : str = dspy . InputField ()
12
- system_response : str = dspy . InputField ()
13
- recall : float = dspy . OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
14
- precision : float = dspy . OutputField (desc = "fraction (out of 1.0) of system response covered by the ground truth" )
12
+ question : str = InputField ()
13
+ ground_truth : str = InputField ()
14
+ system_response : str = InputField ()
15
+ recall : float = OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
16
+ precision : float = OutputField (desc = "fraction (out of 1.0) of system response covered by the ground truth" )
15
17
16
18
17
- class DecompositionalSemanticRecallPrecision (dspy . Signature ):
19
+ class DecompositionalSemanticRecallPrecision (Signature ):
18
20
"""
19
21
Compare a system's response to the ground truth to compute recall and precision of key ideas.
20
22
You will first enumerate key ideas in each response, discuss their overlap, and then report recall and precision.
21
23
"""
22
24
23
- question : str = dspy . InputField ()
24
- ground_truth : str = dspy . InputField ()
25
- system_response : str = dspy . InputField ()
26
- ground_truth_key_ideas : str = dspy . OutputField (desc = "enumeration of key ideas in the ground truth" )
27
- system_response_key_ideas : str = dspy . OutputField (desc = "enumeration of key ideas in the system response" )
28
- discussion : str = dspy . OutputField (desc = "discussion of the overlap between ground truth and system response" )
29
- recall : float = dspy . OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
30
- precision : float = dspy . OutputField (desc = "fraction (out of 1.0) of system response covered by the ground truth" )
25
+ question : str = InputField ()
26
+ ground_truth : str = InputField ()
27
+ system_response : str = InputField ()
28
+ ground_truth_key_ideas : str = OutputField (desc = "enumeration of key ideas in the ground truth" )
29
+ system_response_key_ideas : str = OutputField (desc = "enumeration of key ideas in the system response" )
30
+ discussion : str = OutputField (desc = "discussion of the overlap between ground truth and system response" )
31
+ recall : float = OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
32
+ precision : float = OutputField (desc = "fraction (out of 1.0) of system response covered by the ground truth" )
31
33
32
34
33
35
def f1_score (precision , recall ):
34
36
precision , recall = max (0.0 , min (1.0 , precision )), max (0.0 , min (1.0 , recall ))
35
37
return 0.0 if precision + recall == 0 else 2 * (precision * recall ) / (precision + recall )
36
38
37
39
38
- class SemanticF1 (dspy . Module ):
40
+ class SemanticF1 (Module ):
39
41
def __init__ (self , threshold = 0.66 , decompositional = False ):
40
42
self .threshold = threshold
41
43
42
44
if decompositional :
43
- self .module = dspy . ChainOfThought (DecompositionalSemanticRecallPrecision )
45
+ self .module = ChainOfThought (DecompositionalSemanticRecallPrecision )
44
46
else :
45
- self .module = dspy . ChainOfThought (SemanticRecallPrecision )
47
+ self .module = ChainOfThought (SemanticRecallPrecision )
46
48
47
49
def forward (self , example , pred , trace = None ):
48
50
scores = self .module (question = example .question , ground_truth = example .response , system_response = pred .response )
@@ -55,42 +57,42 @@ def forward(self, example, pred, trace=None):
55
57
###########
56
58
57
59
58
- class AnswerCompleteness (dspy . Signature ):
60
+ class AnswerCompleteness (Signature ):
59
61
"""
60
62
Estimate the completeness of a system's responses, against the ground truth.
61
63
You will first enumerate key ideas in each response, discuss their overlap, and then report completeness.
62
64
"""
63
65
64
- question : str = dspy . InputField ()
65
- ground_truth : str = dspy . InputField ()
66
- system_response : str = dspy . InputField ()
67
- ground_truth_key_ideas : str = dspy . OutputField (desc = "enumeration of key ideas in the ground truth" )
68
- system_response_key_ideas : str = dspy . OutputField (desc = "enumeration of key ideas in the system response" )
69
- discussion : str = dspy . OutputField (desc = "discussion of the overlap between ground truth and system response" )
70
- completeness : float = dspy . OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
66
+ question : str = InputField ()
67
+ ground_truth : str = InputField ()
68
+ system_response : str = InputField ()
69
+ ground_truth_key_ideas : str = OutputField (desc = "enumeration of key ideas in the ground truth" )
70
+ system_response_key_ideas : str = OutputField (desc = "enumeration of key ideas in the system response" )
71
+ discussion : str = OutputField (desc = "discussion of the overlap between ground truth and system response" )
72
+ completeness : float = OutputField (desc = "fraction (out of 1.0) of ground truth covered by the system response" )
71
73
72
74
73
75
74
- class AnswerGroundedness (dspy . Signature ):
76
+ class AnswerGroundedness (Signature ):
75
77
"""
76
78
Estimate the groundedness of a system's responses, against real retrieved documents written by people.
77
79
You will first enumerate whatever non-trivial or check-worthy claims are made in the system response, and then
78
80
discuss the extent to which some or all of them can be deduced from the retrieved context and basic commonsense.
79
81
"""
80
82
81
- question : str = dspy . InputField ()
82
- retrieved_context : str = dspy . InputField ()
83
- system_response : str = dspy . InputField ()
84
- system_response_claims : str = dspy . OutputField (desc = "enumeration of non-trivial or check-worthy claims in the system response" )
85
- discussion : str = dspy . OutputField (desc = "discussion of how supported the claims are by the retrieved context" )
86
- groundedness : float = dspy . OutputField (desc = "fraction (out of 1.0) of system response supported by the retrieved context" )
83
+ question : str = InputField ()
84
+ retrieved_context : str = InputField ()
85
+ system_response : str = InputField ()
86
+ system_response_claims : str = OutputField (desc = "enumeration of non-trivial or check-worthy claims in the system response" )
87
+ discussion : str = OutputField (desc = "discussion of how supported the claims are by the retrieved context" )
88
+ groundedness : float = OutputField (desc = "fraction (out of 1.0) of system response supported by the retrieved context" )
87
89
88
90
89
- class CompleteAndGrounded (dspy . Module ):
91
+ class CompleteAndGrounded (Module ):
90
92
def __init__ (self , threshold = 0.66 ):
91
93
self .threshold = threshold
92
- self .completeness_module = dspy . ChainOfThought (AnswerCompleteness )
93
- self .groundedness_module = dspy . ChainOfThought (AnswerGroundedness )
94
+ self .completeness_module = ChainOfThought (AnswerCompleteness )
95
+ self .groundedness_module = ChainOfThought (AnswerGroundedness )
94
96
95
97
def forward (self , example , pred , trace = None ):
96
98
completeness = self .completeness_module (question = example .question , ground_truth = example .response , system_response = pred .response )
0 commit comments