@@ -26,22 +26,76 @@ describe('evaluateAnswer', () => {
26
26
const { response } = await evaluateAnswer (
27
27
'What is TypeScript?' ,
28
28
'TypeScript is a strongly typed programming language that builds on JavaScript.' ,
29
+ [ 'definitive' ] ,
29
30
tokenTracker
30
31
) ;
31
- expect ( response ) . toHaveProperty ( 'is_definitive' ) ;
32
- expect ( response ) . toHaveProperty ( 'reasoning' ) ;
32
+ expect ( response ) . toHaveProperty ( 'pass' ) ;
33
+ expect ( response ) . toHaveProperty ( 'think' ) ;
34
+ expect ( response . type ) . toBe ( 'definitive' ) ;
35
+ expect ( response . pass ) . toBe ( true ) ;
36
+ } ) ;
37
+
38
+ it ( 'should evaluate answer freshness' , async ( ) => {
39
+ const tokenTracker = new TokenTracker ( ) ;
40
+ const { response } = await evaluateAnswer (
41
+ 'What is the latest version of Node.js?' ,
42
+ 'The latest version of Node.js is 14.0.0, released in April 2020.' ,
43
+ [ 'freshness' ] ,
44
+ tokenTracker
45
+ ) ;
46
+ expect ( response ) . toHaveProperty ( 'pass' ) ;
47
+ expect ( response ) . toHaveProperty ( 'think' ) ;
48
+ expect ( response . type ) . toBe ( 'freshness' ) ;
49
+ expect ( response . freshness_analysis ) . toBeDefined ( ) ;
50
+ expect ( response . freshness_analysis ?. likely_outdated ) . toBe ( true ) ;
51
+ expect ( response . freshness_analysis ?. dates_mentioned ) . toContain ( '2020-04' ) ;
52
+ expect ( response . freshness_analysis ?. current_time ) . toBeDefined ( ) ;
53
+ expect ( response . pass ) . toBe ( false ) ;
54
+ } ) ;
55
+
56
+ it ( 'should evaluate answer plurality' , async ( ) => {
57
+ const tokenTracker = new TokenTracker ( ) ;
58
+ const { response } = await evaluateAnswer (
59
+ 'List three programming languages.' ,
60
+ 'Python is a programming language.' ,
61
+ [ 'plurality' ] ,
62
+ tokenTracker
63
+ ) ;
64
+ expect ( response ) . toHaveProperty ( 'pass' ) ;
65
+ expect ( response ) . toHaveProperty ( 'think' ) ;
66
+ expect ( response . type ) . toBe ( 'plurality' ) ;
67
+ expect ( response . plurality_analysis ) . toBeDefined ( ) ;
68
+ expect ( response . plurality_analysis ?. expects_multiple ) . toBe ( true ) ;
69
+ expect ( response . plurality_analysis ?. provides_multiple ) . toBe ( false ) ;
70
+ expect ( response . plurality_analysis ?. count_expected ) . toBe ( 3 ) ;
71
+ expect ( response . plurality_analysis ?. count_provided ) . toBe ( 1 ) ;
72
+ expect ( response . pass ) . toBe ( false ) ;
73
+ } ) ;
74
+
75
+ it ( 'should evaluate in order and stop at first failure' , async ( ) => {
76
+ const tokenTracker = new TokenTracker ( ) ;
77
+ const { response } = await evaluateAnswer (
78
+ 'List the latest Node.js versions.' ,
79
+ 'I am not sure about the Node.js versions.' ,
80
+ [ 'definitive' , 'freshness' , 'plurality' ] ,
81
+ tokenTracker
82
+ ) ;
83
+ expect ( response . type ) . toBe ( 'definitive' ) ;
84
+ expect ( response . pass ) . toBe ( false ) ;
85
+ expect ( response . freshness_analysis ) . toBeUndefined ( ) ;
86
+ expect ( response . plurality_analysis ) . toBeUndefined ( ) ;
33
87
} ) ;
34
88
35
89
it ( 'should track token usage' , async ( ) => {
36
90
const tokenTracker = new TokenTracker ( ) ;
37
91
const spy = jest . spyOn ( tokenTracker , 'trackUsage' ) ;
38
- const { tokens } = await evaluateAnswer (
92
+ await evaluateAnswer (
39
93
'What is TypeScript?' ,
40
94
'TypeScript is a strongly typed programming language that builds on JavaScript.' ,
95
+ [ 'definitive' , 'freshness' , 'plurality' ] ,
41
96
tokenTracker
42
97
) ;
43
- expect ( spy ) . toHaveBeenCalledWith ( 'evaluator' , tokens ) ;
44
- expect ( tokens ) . toBeGreaterThan ( 0 ) ;
98
+ expect ( spy ) . toHaveBeenCalledWith ( 'evaluator' , expect . any ( Number ) ) ;
45
99
} ) ;
46
100
} ) ;
47
101
} ) ;
0 commit comments