Skip to content

Commit 117e631

Browse files
authored
Evaluations v1.5 frontend second part (#1029)
* Evaluations v1.5 more frontend * wip * wip * wip * wip * wip
1 parent d2a66eb commit 117e631

File tree

48 files changed

+1210
-127
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1210
-127
lines changed

apps/web/src/app/(private)/evaluations/_components/CreateEvaluationModal/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { useCurrentDocument } from '$/app/providers/DocumentProvider'
44
import EvaluationV2Form from '$/components/evaluations/EvaluationV2Form'
55
import { useFeatureFlag } from '$/components/Providers/FeatureFlags'
66
import useEvaluations from '$/stores/evaluations'
7-
import useEvaluationsV2 from '$/stores/evaluationsV2'
7+
import { useEvaluationsV2 } from '$/stores/evaluationsV2'
88
import {
99
EvaluationMetadataType,
1010
EvaluationOptions,

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/_components/DocumentEditor/Editor/Playground/DocumentEvaluations/index.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import {
55
import useConnectedEvaluations from '$/stores/connectedEvaluations'
66
import useEvaluationResultsByDocumentLogs from '$/stores/evaluationResultsByDocumentLogs'
77
import useEvaluationResultsV2ByDocumentLogs from '$/stores/evaluationResultsV2/byDocumentLogs'
8-
import useEvaluationsV2 from '$/stores/evaluationsV2'
8+
import { useEvaluationsV2 } from '$/stores/evaluationsV2'
99
import {
1010
ConnectedEvaluation,
1111
DocumentVersion,

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations-v2/[evaluationUuid]/_components/EvaluationActions.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { useCurrentDocument } from '$/app/providers/DocumentProvider'
22
import { useCurrentEvaluationV2 } from '$/app/providers/EvaluationV2Provider'
33
import { EVALUATION_SPECIFICATIONS } from '$/components/evaluations'
44
import EvaluationV2Form from '$/components/evaluations/EvaluationV2Form'
5-
import useEvaluationsV2 from '$/stores/evaluationsV2'
5+
import { useEvaluationsV2 } from '$/stores/evaluationsV2'
66
import {
77
EvaluationMetric,
88
EvaluationOptions,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import { Commit, EvaluationResultsV2Search } from '@latitude-data/core/browser'
2+
import { DatePickerRange } from '@latitude-data/web-ui'
3+
import { endOfDay, startOfDay } from 'date-fns'
4+
import { isEqual } from 'lodash-es'
5+
import { ComponentProps, useMemo } from 'react'
6+
import { CommitFilter } from '../../../logs/_components/Filters/CommitFilter'
7+
8+
export function EvaluationFilters({
9+
commits,
10+
search,
11+
setSearch,
12+
isLoading,
13+
}: {
14+
commits: Record<number, Commit>
15+
search: EvaluationResultsV2Search
16+
setSearch: (search: EvaluationResultsV2Search) => void
17+
isLoading: boolean
18+
}) {
19+
const defaultSelectedCommits = useMemo(
20+
() => Object.values(commits).map((c) => c.id),
21+
[commits],
22+
)
23+
24+
return (
25+
<div className='flex items-center gap-2'>
26+
<DatePickerRange
27+
showPresets
28+
initialRange={
29+
search.filters?.createdAt as ComponentProps<
30+
typeof DatePickerRange
31+
>['initialRange']
32+
}
33+
onCloseChange={(value) => {
34+
if (value?.from) value.from = startOfDay(value.from)
35+
if (value?.to) value.to = endOfDay(value.to)
36+
37+
setSearch({
38+
...search,
39+
filters: {
40+
...(search.filters ?? {}),
41+
createdAt: value,
42+
},
43+
})
44+
}}
45+
disabled={isLoading}
46+
/>
47+
<CommitFilter
48+
selectedCommitsIds={search.filters?.commitIds ?? defaultSelectedCommits}
49+
onSelectCommits={(value) =>
50+
setSearch({
51+
...search,
52+
filters: {
53+
...(search.filters ?? {}),
54+
commitIds: value,
55+
},
56+
})
57+
}
58+
isDefault={
59+
!search.filters?.commitIds ||
60+
isEqual(search.filters?.commitIds, defaultSelectedCommits)
61+
}
62+
reset={() =>
63+
setSearch({
64+
...search,
65+
filters: {
66+
...(search.filters ?? {}),
67+
commitIds: defaultSelectedCommits,
68+
},
69+
})
70+
}
71+
disabled={isLoading}
72+
/>
73+
</div>
74+
)
75+
}

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations-v2/[evaluationUuid]/_components/EvaluationPage.tsx

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import {
99
} from '$/components/Providers/WebsocketsProvider/useSockets'
1010
import { useCommits } from '$/stores/commitsStore'
1111
import { useEvaluationResultsV2 } from '$/stores/evaluationResultsV2'
12+
import { useEvaluationV2Stats } from '$/stores/evaluationsV2'
1213
import {
1314
Commit,
1415
EvaluationMetric,
@@ -17,12 +18,14 @@ import {
1718
EvaluationResultV2,
1819
EvaluationType,
1920
EvaluationV2,
21+
EvaluationV2Stats,
2022
} from '@latitude-data/core/browser'
2123
import {
2224
Badge,
2325
Breadcrumb,
2426
BreadcrumbItem,
2527
ClickToCopyUuid,
28+
Icon,
2629
TableWithHeader,
2730
Text,
2831
Tooltip,
@@ -33,6 +36,7 @@ import { useRouter } from 'next/navigation'
3336
import { useCallback, useEffect, useMemo, useState } from 'react'
3437
import { useDebounce } from 'use-debounce'
3538
import { EvaluationActions } from './EvaluationActions'
39+
import { EvaluationFilters } from './EvaluationFilters'
3640
import { EvaluationResultsTable } from './EvaluationResultsTable'
3741
import { EvaluationStats } from './EvaluationStats'
3842

@@ -90,10 +94,12 @@ export function EvaluationPage<
9094
>({
9195
results: serverResults,
9296
selectedResult: serverSelectedResult,
97+
stats: serverStats,
9398
search: serverSearch,
9499
}: {
95100
results: EvaluationResultV2<T, M>[]
96101
selectedResult?: EvaluationResultV2<T, M>
102+
stats?: EvaluationV2Stats
97103
search: EvaluationResultsV2Search
98104
}) {
99105
const { project } = useCurrentProject()
@@ -134,10 +140,15 @@ export function EvaluationPage<
134140
useEvaluationResultsV2Socket({ evaluation, commits, mutate })
135141
const [selectedResult, setSelectedResult] = useState(serverSelectedResult)
136142

137-
const isLoading = isLoadingResults || isLoadingCommits
143+
const { data: stats, isLoading: isLoadingStats } = useEvaluationV2Stats<T, M>(
144+
{ project, commit, document, evaluation, search: debouncedSearch },
145+
{ fallbackData: serverStats },
146+
)
147+
148+
const isLoading = isLoadingResults || isLoadingStats || isLoadingCommits
138149

139150
return (
140-
<div className='flex flex-grow min-h-0 flex-col w-full gap-6 p-6'>
151+
<div className='flex flex-grow min-h-0 flex-col w-full gap-4 p-6'>
141152
<TableWithHeader
142153
title={
143154
<Breadcrumb>
@@ -176,7 +187,28 @@ export function EvaluationPage<
176187
}
177188
actions={<EvaluationActions />}
178189
/>
179-
<EvaluationStats />
190+
<div className='w-full flex items-center justify-between'>
191+
<span className='flex items-center gap-x-2'>
192+
<Text.H4 color='foregroundMuted'>
193+
A {evaluation.configuration.reverseScale ? 'lower' : 'higher'} score
194+
is better
195+
</Text.H4>
196+
{evaluation.configuration.reverseScale ? (
197+
<Icon name='arrowDown' color='foregroundMuted' />
198+
) : (
199+
<Icon name='arrowUp' color='foregroundMuted' />
200+
)}
201+
</span>
202+
<EvaluationFilters
203+
commits={commits}
204+
search={search}
205+
setSearch={setSearch}
206+
isLoading={isLoading}
207+
/>
208+
</div>
209+
<div className='min-h-64 h-64 max-h-64'>
210+
<EvaluationStats stats={stats} isLoading={isLoading} />
211+
</div>
180212
<EvaluationResultsTable
181213
results={results}
182214
selectedResult={selectedResult}

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations-v2/[evaluationUuid]/_components/EvaluationResultsTable.tsx

Lines changed: 2 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@ import {
88
EvaluationResultV2,
99
EvaluationType,
1010
} from '@latitude-data/core/browser'
11-
import { cn, Icon, TableBlankSlate, Text, Tooltip } from '@latitude-data/web-ui'
12-
import { isEqual } from 'lodash-es'
13-
import { useMemo, useRef } from 'react'
14-
import { CommitFilter } from '../../../logs/_components/Filters/CommitFilter'
11+
import { cn, TableBlankSlate } from '@latitude-data/web-ui'
12+
import { useRef } from 'react'
1513
import { EvaluationBatchIndicator } from './EvaluationBatchIndicator'
1614
import { EvaluationResultsTableBody } from './EvaluationResultsTableBody'
1715

@@ -24,7 +22,6 @@ export function EvaluationResultsTable<
2422
setSelectedResult,
2523
commits,
2624
search,
27-
setSearch,
2825
isLoading,
2926
}: {
3027
results: EvaluationResultV2<T, M>[]
@@ -44,64 +41,8 @@ export function EvaluationResultsTable<
4441
rowIds: results.filter((r) => !r.error).map((r) => r.uuid),
4542
})
4643

47-
const defaultSelectedCommits = useMemo(
48-
() =>
49-
Object.values(commits)
50-
.filter((commit) => !!commit.mergedAt)
51-
.map((commit) => commit.id),
52-
[commits],
53-
)
54-
5544
return (
5645
<div className='flex flex-col gap-4 flex-grow min-h-0'>
57-
<div className='w-full flex items-center justify-between'>
58-
<span className='flex items-center gap-2'>
59-
<Text.H4>Results</Text.H4>
60-
<Tooltip
61-
asChild
62-
trigger={
63-
<span>
64-
<Icon name='info' color='foreground' />
65-
</span>
66-
}
67-
align='start'
68-
side='top'
69-
>
70-
Results from the filtered versions of this evaluation and document
71-
</Tooltip>
72-
</span>
73-
<div className='flex items-center gap-2'>
74-
<div className='flex flex-row gap-2 items-center'>
75-
<CommitFilter
76-
selectedCommitsIds={
77-
search.filters?.commitIds ?? defaultSelectedCommits
78-
}
79-
onSelectCommits={(value) =>
80-
setSearch({
81-
...search,
82-
filters: {
83-
...(search.filters ?? {}),
84-
commitIds: value,
85-
},
86-
})
87-
}
88-
isDefault={
89-
!search.filters?.commitIds ||
90-
isEqual(search.filters?.commitIds, defaultSelectedCommits)
91-
}
92-
reset={() =>
93-
setSearch({
94-
...search,
95-
filters: {
96-
...(search.filters ?? {}),
97-
commitIds: defaultSelectedCommits,
98-
},
99-
})
100-
}
101-
/>
102-
</div>
103-
</div>
104-
</div>
10546
<EvaluationBatchIndicator />
10647
<div
10748
className={cn('gap-x-4 grid pb-6', {

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/evaluations-v2/[evaluationUuid]/_components/EvaluationResultsTableBody.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ function EvaluationResultsTableRow<
9393

9494
const countLabel = (selected: number) => (count: number) => {
9595
return selected
96-
? `${selected} of ${count} evaluation results selected`
97-
: `${count} evaluation results`
96+
? `${selected} of ${count} results selected`
97+
: `${count} results`
9898
}
9999

100100
export function EvaluationResultsTableBody<
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,53 @@
11
import { useCurrentEvaluationV2 } from '$/app/providers/EvaluationV2Provider'
2-
import { EVALUATION_SPECIFICATIONS } from '$/components/evaluations'
3-
import { EvaluationMetric, EvaluationType } from '@latitude-data/constants'
2+
import {
3+
EvaluationMetric,
4+
EvaluationType,
5+
EvaluationV2Stats,
6+
} from '@latitude-data/core/browser'
7+
import { cn } from '@latitude-data/web-ui'
8+
import AverageScoreChart from './charts/AverageScore'
9+
import DailyOverviewChart from './charts/DailyOverview'
10+
import TotalCostChart from './charts/TotalCost'
11+
import TotalResultsChart from './charts/TotalResults'
12+
import TotalTokensChart from './charts/TotalTokens'
13+
import VersionOverviewChart from './charts/VersionOverview'
414

515
export function EvaluationStats<
616
T extends EvaluationType = EvaluationType,
717
M extends EvaluationMetric<T> = EvaluationMetric<T>,
8-
>() {
18+
>({ stats, isLoading }: { stats?: EvaluationV2Stats; isLoading: boolean }) {
919
const { evaluation } = useCurrentEvaluationV2<T, M>()
1020

11-
const typeSpecification = EVALUATION_SPECIFICATIONS[evaluation.type]
12-
const metricSpecification = typeSpecification.metrics[evaluation.metric]
13-
14-
// TODO
15-
metricSpecification
16-
17-
return <h1>Evaluation Stats</h1>
21+
return (
22+
<div className='h-full w-full grid xl:grid-cols-3 gap-4 flex-grow'>
23+
<div className='h-full w-full col-span-2 grid grid-cols-2 gap-4'>
24+
<DailyOverviewChart stats={stats} isLoading={isLoading} />
25+
<VersionOverviewChart stats={stats} isLoading={isLoading} />
26+
</div>
27+
<div className='h-full w-full col-span-1 grid grid-rows-2 gap-4'>
28+
<div
29+
className={cn('grid gap-4', {
30+
'grid-cols-1': evaluation.type !== EvaluationType.Llm,
31+
'grid-cols-2': evaluation.type === EvaluationType.Llm,
32+
})}
33+
>
34+
<TotalResultsChart stats={stats} isLoading={isLoading} />
35+
{evaluation.type === EvaluationType.Llm && (
36+
<TotalCostChart stats={stats} isLoading={isLoading} />
37+
)}
38+
</div>
39+
<div
40+
className={cn('grid gap-4', {
41+
'grid-cols-1': evaluation.type !== EvaluationType.Llm,
42+
'grid-cols-2': evaluation.type === EvaluationType.Llm,
43+
})}
44+
>
45+
<AverageScoreChart stats={stats} isLoading={isLoading} />
46+
{evaluation.type === EvaluationType.Llm && (
47+
<TotalTokensChart stats={stats} isLoading={isLoading} />
48+
)}
49+
</div>
50+
</div>
51+
</div>
52+
)
1853
}

0 commit comments

Comments
 (0)