Skip to content

Commit be25629

Browse files
authored
Add schema validation metric (#1033)
* Add schema validation metric * rebase package json
1 parent 6c04a7b commit be25629

File tree

10 files changed

+351
-72
lines changed

10 files changed

+351
-72
lines changed

apps/web/src/components/evaluations/ConfigurationForm.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ export default function ConfigurationForm<
4343
<SelectableSwitch
4444
selected={!(configuration.reverseScale ?? false)}
4545
name='reverseScale'
46-
label='Orientation'
46+
label='Scale orientation'
4747
trueLabel='Higher is better'
4848
falseLabel='Lower is better'
4949
description='Orientation of the metric scale when normalizing the score for internal operations and to display evaluation results'

apps/web/src/components/evaluations/rule/RegularExpression.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function ConfigurationForm({
4040
<Input
4141
value={configuration.pattern ?? ''}
4242
name='pattern'
43-
label='Regex Pattern'
43+
label='Regex pattern'
4444
description='The regex pattern to match against'
4545
placeholder='.*pattern.*'
4646
onChange={(e) =>
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import {
2+
EvaluationType,
3+
RuleEvaluationMetric,
4+
RuleEvaluationSchemaValidationSpecification,
5+
} from '@latitude-data/constants'
6+
import { IconName, Select, TextArea } from '@latitude-data/web-ui'
7+
import {
8+
ChartConfigurationArgs,
9+
ConfigurationFormProps,
10+
ResultBadgeProps,
11+
ResultPanelProps,
12+
ResultRowCellsProps,
13+
ResultRowHeadersProps,
14+
} from '../index'
15+
16+
const specification = RuleEvaluationSchemaValidationSpecification
17+
export default {
18+
...specification,
19+
icon: 'clipboardCheck' as IconName,
20+
ConfigurationForm: ConfigurationForm,
21+
ResultBadge: ResultBadge,
22+
ResultRowHeaders: ResultRowHeaders,
23+
ResultRowCells: ResultRowCells,
24+
resultPanelTabs: [],
25+
ResultPanelMetadata: ResultPanelMetadata,
26+
ResultPanelContent: ResultPanelContent,
27+
chartConfiguration: chartConfiguration,
28+
}
29+
30+
const FORMAT_OPTIONS = specification.configuration.shape.format.options.map(
31+
(option) => ({
32+
label: option.toUpperCase().split('_').join(' '),
33+
value: option,
34+
}),
35+
)
36+
37+
function ConfigurationForm({
38+
configuration,
39+
setConfiguration,
40+
disabled,
41+
}: ConfigurationFormProps<
42+
EvaluationType.Rule,
43+
RuleEvaluationMetric.SchemaValidation
44+
>) {
45+
return (
46+
<>
47+
<Select
48+
value={configuration.format ?? ''}
49+
name='format'
50+
label='Schema format'
51+
description='The format of the schema'
52+
placeholder='Select a schema format'
53+
options={FORMAT_OPTIONS}
54+
onChange={(value) =>
55+
setConfiguration({ ...configuration, format: value })
56+
}
57+
disabled={disabled}
58+
required
59+
/>
60+
<TextArea
61+
value={configuration.schema ?? ''}
62+
name='schema'
63+
label={
64+
configuration.format
65+
? `${configuration.format.toUpperCase().split('_').join(' ')} schema`
66+
: 'Schema'
67+
}
68+
description='The schema to validate against'
69+
placeholder='{ "type": "object" }'
70+
onChange={(e) =>
71+
setConfiguration({ ...configuration, schema: e.target.value })
72+
}
73+
minRows={3}
74+
disabled={disabled}
75+
required
76+
/>
77+
</>
78+
)
79+
}
80+
81+
function ResultBadge({
82+
result,
83+
}: ResultBadgeProps<
84+
EvaluationType.Rule,
85+
RuleEvaluationMetric.SchemaValidation
86+
>) {
87+
return <>{result.score === 1 ? 'Valid' : 'Invalid'}</>
88+
}
89+
90+
function ResultRowHeaders(
91+
_props: ResultRowHeadersProps<
92+
EvaluationType.Rule,
93+
RuleEvaluationMetric.SchemaValidation
94+
>,
95+
) {
96+
return <></>
97+
}
98+
99+
function ResultRowCells(
100+
_props: ResultRowCellsProps<
101+
EvaluationType.Rule,
102+
RuleEvaluationMetric.SchemaValidation
103+
>,
104+
) {
105+
return <></>
106+
}
107+
108+
function ResultPanelMetadata(
109+
_props: ResultPanelProps<
110+
EvaluationType.Rule,
111+
RuleEvaluationMetric.SchemaValidation
112+
>,
113+
) {
114+
return <></>
115+
}
116+
117+
function ResultPanelContent(
118+
_props: ResultPanelProps<
119+
EvaluationType.Rule,
120+
RuleEvaluationMetric.SchemaValidation
121+
>,
122+
) {
123+
return <></>
124+
}
125+
126+
function chartConfiguration(
127+
_args: ChartConfigurationArgs<
128+
EvaluationType.Rule,
129+
RuleEvaluationMetric.SchemaValidation
130+
>,
131+
) {
132+
return {
133+
min: 0,
134+
max: 100,
135+
thresholds: [50] as const,
136+
scale: (point: number) => Math.min(Math.max(point * 100, 0), 100),
137+
format: (point: number, short?: boolean) =>
138+
short ? `${point.toFixed(0)}%` : `${point.toFixed(0)}% valid`,
139+
}
140+
}

apps/web/src/components/evaluations/rule/index.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ import {
1515
} from '../index'
1616
import RuleEvaluationExactMatchSpecification from './ExactMatch'
1717
import RuleEvaluationRegularExpressionSpecification from './RegularExpression'
18+
import RuleEvaluationSchemaValidationSpecification from './SchemaValidation'
1819

1920
// prettier-ignore
2021
const METRICS: {
2122
[M in RuleEvaluationMetric]: EvaluationMetricFrontendSpecification<EvaluationType.Rule, M>
2223
} = {
2324
[RuleEvaluationMetric.ExactMatch]: RuleEvaluationExactMatchSpecification,
2425
[RuleEvaluationMetric.RegularExpression]: RuleEvaluationRegularExpressionSpecification,
26+
[RuleEvaluationMetric.SchemaValidation]: RuleEvaluationSchemaValidationSpecification,
2527
[RuleEvaluationMetric.LengthCount]: undefined as any, // TODO: Implement
2628
[RuleEvaluationMetric.LexicalOverlap]: undefined as any, // TODO: Implement
2729
[RuleEvaluationMetric.SemanticSimilarity]: undefined as any, // TODO: Implement

packages/constants/src/evaluations/rule.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,39 @@ export type RuleEvaluationRegularExpressionResultError = z.infer<
7373
typeof RuleEvaluationRegularExpressionSpecification.resultError
7474
>
7575

76+
// SCHEMA VALIDATION
77+
78+
const ruleEvaluationSchemaValidationConfiguration =
79+
ruleEvaluationConfiguration.extend({
80+
format: z.enum(['json']),
81+
schema: z.string(),
82+
})
83+
const ruleEvaluationSchemaValidationResultMetadata =
84+
ruleEvaluationResultMetadata.extend({
85+
configuration: ruleEvaluationSchemaValidationConfiguration,
86+
})
87+
const ruleEvaluationSchemaValidationResultError =
88+
ruleEvaluationResultError.extend({})
89+
export const RuleEvaluationSchemaValidationSpecification = {
90+
name: 'Schema Validation',
91+
description: 'Checks if the response follows the schema',
92+
configuration: ruleEvaluationSchemaValidationConfiguration,
93+
resultMetadata: ruleEvaluationSchemaValidationResultMetadata,
94+
resultError: ruleEvaluationSchemaValidationResultError,
95+
requiresExpectedOutput: false,
96+
supportsLiveEvaluation: true,
97+
supportsBatchEvaluation: true,
98+
}
99+
export type RuleEvaluationSchemaValidationConfiguration = z.infer<
100+
typeof RuleEvaluationSchemaValidationSpecification.configuration
101+
>
102+
export type RuleEvaluationSchemaValidationResultMetadata = z.infer<
103+
typeof RuleEvaluationSchemaValidationSpecification.resultMetadata
104+
>
105+
export type RuleEvaluationSchemaValidationResultError = z.infer<
106+
typeof RuleEvaluationSchemaValidationSpecification.resultError
107+
>
108+
76109
// LENGTH COUNT
77110

78111
const ruleEvaluationLengthCountConfiguration =
@@ -188,6 +221,7 @@ export type RuleEvaluationSemanticSimilarityResultError = z.infer<
188221
export enum RuleEvaluationMetric {
189222
ExactMatch = 'exact_match',
190223
RegularExpression = 'regular_expression',
224+
SchemaValidation = 'schema_validation',
191225
LengthCount = 'length_count',
192226
LexicalOverlap = 'lexical_overlap',
193227
SemanticSimilarity = 'semantic_similarity',
@@ -197,6 +231,7 @@ export enum RuleEvaluationMetric {
197231
export type RuleEvaluationConfiguration<M extends RuleEvaluationMetric = RuleEvaluationMetric> =
198232
M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchConfiguration :
199233
M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionConfiguration :
234+
M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationConfiguration :
200235
M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountConfiguration :
201236
M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapConfiguration :
202237
M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityConfiguration :
@@ -206,6 +241,7 @@ export type RuleEvaluationConfiguration<M extends RuleEvaluationMetric = RuleEva
206241
export type RuleEvaluationResultMetadata<M extends RuleEvaluationMetric = RuleEvaluationMetric> =
207242
M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchResultMetadata :
208243
M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionResultMetadata :
244+
M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationResultMetadata :
209245
M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountResultMetadata :
210246
M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapResultMetadata :
211247
M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityResultMetadata :
@@ -215,6 +251,7 @@ export type RuleEvaluationResultMetadata<M extends RuleEvaluationMetric = RuleEv
215251
export type RuleEvaluationResultError<M extends RuleEvaluationMetric = RuleEvaluationMetric> =
216252
M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchResultError :
217253
M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionResultError :
254+
M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationResultError :
218255
M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountResultError :
219256
M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapResultError :
220257
M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityResultError :
@@ -230,6 +267,7 @@ export const RuleEvaluationSpecification = {
230267
metrics: {
231268
[RuleEvaluationMetric.ExactMatch]: RuleEvaluationExactMatchSpecification,
232269
[RuleEvaluationMetric.RegularExpression]: RuleEvaluationRegularExpressionSpecification,
270+
[RuleEvaluationMetric.SchemaValidation]: RuleEvaluationSchemaValidationSpecification,
233271
[RuleEvaluationMetric.LengthCount]: RuleEvaluationLengthCountSpecification,
234272
[RuleEvaluationMetric.LexicalOverlap]: RuleEvaluationLexicalOverlapSpecification,
235273
[RuleEvaluationMetric.SemanticSimilarity]: RuleEvaluationSemanticSimilaritySpecification,

packages/core/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
"@codesandbox/sdk": "^0.6.2",
162162
"@modelcontextprotocol/sdk": "^1.6.0",
163163
"@tavily/core": "^0.3.1",
164+
"ajv": "^8.17.1",
164165
"date-fns": "^3.6.0",
165166
"diff-match-patch": "^1.0.5",
166167
"js-yaml": "^4.1.0",

packages/core/src/services/evaluationsV2/rule/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@ import {
1313
} from '../shared'
1414
import RuleEvaluationExactMatchSpecification from './exactMatch'
1515
import RuleEvaluationRegularExpressionSpecification from './regularExpression'
16+
import RuleEvaluationSchemaValidationSpecification from './schemaValidation'
1617

1718
// prettier-ignore
1819
const METRICS: {
1920
[M in RuleEvaluationMetric]: EvaluationMetricBackendSpecification<EvaluationType.Rule, M>
2021
} = {
2122
[RuleEvaluationMetric.ExactMatch]: RuleEvaluationExactMatchSpecification,
2223
[RuleEvaluationMetric.RegularExpression]: RuleEvaluationRegularExpressionSpecification,
24+
[RuleEvaluationMetric.SchemaValidation]: RuleEvaluationSchemaValidationSpecification,
2325
[RuleEvaluationMetric.LengthCount]: undefined as any, // TODO: Implement
2426
[RuleEvaluationMetric.LexicalOverlap]: undefined as any, // TODO: Implement
2527
[RuleEvaluationMetric.SemanticSimilarity]: undefined as any, // TODO: Implement

0 commit comments

Comments
 (0)