Skip to content

Commit e988c06

Browse files
nsarrazinantoniora
andauthored
Add metrics for models, tools, websearch (#1186)
* Add custom metrics for messages and conversations * lint * Add metrics for - model health - tools - websearch * Add time window & age buckets to summaries * Increase max age for tool use duration --------- Co-authored-by: antoniora <antonio.ramos@adyen.com>
1 parent 309f226 commit e988c06

File tree

7 files changed

+209
-1
lines changed

7 files changed

+209
-1
lines changed

src/lib/server/metrics.ts

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,39 @@
1-
import { collectDefaultMetrics, Registry } from "prom-client";
1+
import { collectDefaultMetrics, Registry, Counter, Summary } from "prom-client";
22
import express from "express";
33
import { logger } from "$lib/server/logger";
44
import { env } from "$env/dynamic/private";
5+
import type { Model } from "$lib/types/Model";
6+
import type { Tool } from "$lib/types/Tool";
7+
8+
interface Metrics {
9+
model: {
10+
conversationsTotal: Counter<Model["id"]>;
11+
messagesTotal: Counter<Model["id"]>;
12+
tokenCountTotal: Counter<Model["id"]>;
13+
timePerOutputToken: Summary<Model["id"]>;
14+
timeToFirstToken: Summary<Model["id"]>;
15+
latency: Summary<Model["id"]>;
16+
};
17+
18+
webSearch: {
19+
requestCount: Counter;
20+
pageFetchCount: Counter;
21+
pageFetchCountError: Counter;
22+
pageFetchDuration: Summary;
23+
embeddingDuration: Summary;
24+
};
25+
26+
tool: {
27+
toolUseCount: Counter<Tool["name"]>;
28+
toolUseCountError: Counter<Tool["name"]>;
29+
toolUseDuration: Summary<Tool["name"]>;
30+
timeToChooseTools: Summary;
31+
};
32+
}
533

634
export class MetricsServer {
735
private static instance: MetricsServer;
36+
private metrics: Metrics;
837

938
private constructor() {
1039
const app = express();
@@ -17,6 +46,114 @@ export class MetricsServer {
1746
const register = new Registry();
1847
collectDefaultMetrics({ register });
1948

49+
this.metrics = {
50+
model: {
51+
conversationsTotal: new Counter({
52+
name: "model_conversations_total",
53+
help: "Total number of conversations",
54+
labelNames: ["model"],
55+
registers: [register],
56+
}),
57+
messagesTotal: new Counter({
58+
name: "model_messages_total",
59+
help: "Total number of messages",
60+
labelNames: ["model"],
61+
registers: [register],
62+
}),
63+
tokenCountTotal: new Counter({
64+
name: "model_token_count_total",
65+
help: "Total number of tokens",
66+
labelNames: ["model"],
67+
registers: [register],
68+
}),
69+
timePerOutputToken: new Summary({
70+
name: "model_time_per_output_token_ms",
71+
help: "Time per output token in ms",
72+
labelNames: ["model"],
73+
registers: [register],
74+
maxAgeSeconds: 5 * 60,
75+
ageBuckets: 5,
76+
}),
77+
timeToFirstToken: new Summary({
78+
name: "model_time_to_first_token_ms",
79+
help: "Time to first token",
80+
labelNames: ["model"],
81+
registers: [register],
82+
maxAgeSeconds: 5 * 60,
83+
ageBuckets: 5,
84+
}),
85+
latency: new Summary({
86+
name: "model_latency_ms",
87+
help: "Total latency until end of answer",
88+
labelNames: ["model"],
89+
registers: [register],
90+
maxAgeSeconds: 5 * 60,
91+
ageBuckets: 5,
92+
}),
93+
},
94+
webSearch: {
95+
requestCount: new Counter({
96+
name: "web_search_request_count",
97+
help: "Total number of web search requests",
98+
registers: [register],
99+
}),
100+
pageFetchCount: new Counter({
101+
name: "web_search_page_fetch_count",
102+
help: "Total number of web search page fetches",
103+
registers: [register],
104+
}),
105+
pageFetchCountError: new Counter({
106+
name: "web_search_page_fetch_count_error",
107+
help: "Total number of web search page fetch errors",
108+
registers: [register],
109+
}),
110+
pageFetchDuration: new Summary({
111+
name: "web_search_page_fetch_duration_ms",
112+
help: "Web search page fetch duration",
113+
registers: [register],
114+
maxAgeSeconds: 5 * 60,
115+
ageBuckets: 5,
116+
}),
117+
embeddingDuration: new Summary({
118+
name: "web_search_embedding_duration_ms",
119+
help: "Web search embedding duration",
120+
registers: [register],
121+
maxAgeSeconds: 5 * 60,
122+
ageBuckets: 5,
123+
}),
124+
},
125+
tool: {
126+
toolUseCount: new Counter({
127+
name: "tool_use_count",
128+
help: "Total number of tool uses",
129+
labelNames: ["tool"],
130+
registers: [register],
131+
}),
132+
toolUseCountError: new Counter({
133+
name: "tool_use_count_error",
134+
help: "Total number of tool use errors",
135+
labelNames: ["tool"],
136+
registers: [register],
137+
}),
138+
toolUseDuration: new Summary({
139+
name: "tool_use_duration_ms",
140+
help: "Tool use duration",
141+
labelNames: ["tool"],
142+
registers: [register],
143+
maxAgeSeconds: 30 * 60, // longer duration since we use this to give feedback to the user
144+
ageBuckets: 5,
145+
}),
146+
timeToChooseTools: new Summary({
147+
name: "time_to_choose_tools_ms",
148+
help: "Time to choose tools",
149+
labelNames: ["model"],
150+
registers: [register],
151+
maxAgeSeconds: 5 * 60,
152+
ageBuckets: 5,
153+
}),
154+
},
155+
};
156+
20157
app.get("/metrics", (req, res) => {
21158
register.metrics().then((metrics) => {
22159
res.set("Content-Type", "text/plain");
@@ -40,4 +177,8 @@ export class MetricsServer {
40177

41178
return MetricsServer.instance;
42179
}
180+
181+
public static getMetrics(): Metrics {
182+
return MetricsServer.getInstance().metrics;
183+
}
43184
}

src/lib/server/textGeneration/tools.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import { logger } from "../logger";
1818
import { toolHasName } from "../tools/utils";
1919
import type { MessageFile } from "$lib/types/Message";
2020
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
21+
import { MetricsServer } from "../metrics";
2122

2223
function makeFilesPrompt(files: MessageFile[], fileMessageIndex: number): string {
2324
if (files.length === 0) {
@@ -62,6 +63,9 @@ async function* runTool(
6263
// Special case for directly_answer tool where we ignore
6364
if (toolHasName(directlyAnswer.name, tool)) return;
6465

66+
const startTime = Date.now();
67+
MetricsServer.getMetrics().tool.toolUseCount.inc({ tool: call.name });
68+
6569
yield {
6670
type: MessageUpdateType.Tool,
6771
subtype: MessageToolUpdateType.Call,
@@ -92,8 +96,14 @@ async function* runTool(
9296
};
9397
}
9498

99+
MetricsServer.getMetrics().tool.toolUseDuration.observe(
100+
{ tool: call.name },
101+
Date.now() - startTime
102+
);
103+
95104
return { ...toolResult, call } as ToolResult;
96105
} catch (e) {
106+
MetricsServer.getMetrics().tool.toolUseCountError.inc({ tool: call.name });
97107
yield {
98108
type: MessageUpdateType.Tool,
99109
subtype: MessageToolUpdateType.Error,
@@ -102,6 +112,7 @@ async function* runTool(
102112
};
103113
}
104114
} catch (cause) {
115+
MetricsServer.getMetrics().tool.toolUseCountError.inc({ tool: call.name });
105116
console.error(Error(`Failed while running tool ${call.name}`), { cause });
106117
return {
107118
call,
@@ -126,6 +137,8 @@ export async function* runTools(
126137
};
127138
});
128139

140+
const pickToolStartTime = Date.now();
141+
129142
// do the function calling bits here
130143
for await (const output of await endpoint({
131144
messages: messagesWithFilesPrompt,
@@ -163,6 +176,11 @@ export async function* runTools(
163176
}
164177
}
165178

179+
MetricsServer.getMetrics().tool.timeToChooseTools.observe(
180+
{ model: conv.model },
181+
Date.now() - pickToolStartTime
182+
);
183+
166184
const toolContext: BackendToolContext = { conv, messages, preprompt, assistant };
167185
const toolResults: (ToolResult | undefined)[] = yield* mergeAsyncGenerators(
168186
calls.map((call) => runTool(toolContext, tools, call))

src/lib/server/websearch/embed/embed.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { MetricsServer } from "$lib/server/metrics";
12
import type { WebSearchScrapedSource, WebSearchUsedSource } from "$lib/types/WebSearch";
23
import type { EmbeddingBackendModel } from "../../embeddingModels";
34
import { getSentenceSimilarity, innerProduct } from "../../sentenceSimilarity";
@@ -14,6 +15,8 @@ export async function findContextSources(
1415
prompt: string,
1516
embeddingModel: EmbeddingBackendModel
1617
) {
18+
const startTime = Date.now();
19+
1720
const sourcesMarkdownElems = sources.map((source) => flattenTree(source.page.markdownTree));
1821
const markdownElems = sourcesMarkdownElems.flat();
1922

@@ -76,5 +79,7 @@ export async function findContextSources(
7679
})
7780
.filter((contextSource) => contextSource.context.length > 0);
7881

82+
MetricsServer.getMetrics().webSearch.embeddingDuration.observe(Date.now() - startTime);
83+
7984
return contextSources;
8085
}

src/lib/server/websearch/runWebSearch.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import {
1717
makeSourcesUpdate,
1818
} from "./update";
1919
import { mergeAsyncGenerators } from "$lib/utils/mergeAsyncGenerators";
20+
import { MetricsServer } from "../metrics";
2021

2122
const MAX_N_PAGES_TO_SCRAPE = 8 as const;
2223
const MAX_N_PAGES_TO_EMBED = 5 as const;
@@ -31,6 +32,8 @@ export async function* runWebSearch(
3132
const createdAt = new Date();
3233
const updatedAt = new Date();
3334

35+
MetricsServer.getMetrics().webSearch.requestCount.inc();
36+
3437
try {
3538
const embeddingModel =
3639
embeddingModels.find((m) => m.id === conv.embeddingModel) ?? defaultEmbeddingModel;

src/lib/server/websearch/scrape/scrape.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@ import { spatialParser } from "./parser";
66
import { htmlToMarkdownTree } from "../markdown/tree";
77
import { timeout } from "$lib/utils/timeout";
88
import { makeErrorUpdate, makeGeneralUpdate } from "../update";
9+
import { MetricsServer } from "$lib/server/metrics";
910

1011
export const scrape = (maxCharsPerElem: number) =>
1112
async function* (
1213
source: WebSearchSource
1314
): AsyncGenerator<MessageWebSearchUpdate, WebSearchScrapedSource | undefined, undefined> {
1415
try {
16+
const startTime = Date.now();
17+
MetricsServer.getMetrics().webSearch.pageFetchCount.inc();
18+
1519
const page = await scrapeUrl(source.link, maxCharsPerElem);
20+
21+
MetricsServer.getMetrics().webSearch.pageFetchDuration.observe(Date.now() - startTime);
22+
1623
yield makeGeneralUpdate({ message: "Browsing webpage", args: [source.link] });
1724
return { ...source, page };
1825
} catch (e) {
26+
MetricsServer.getMetrics().webSearch.pageFetchCountError.inc();
1927
const message = e instanceof Error ? e.message : String(e);
2028
yield makeErrorUpdate({ message: "Failed to parse webpage", args: [message, source.link] });
2129
}

src/routes/conversation/+server.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
1010
import { v4 } from "uuid";
1111
import { authCondition } from "$lib/server/auth";
1212
import { usageLimits } from "$lib/server/usageLimits";
13+
import { MetricsServer } from "$lib/server/metrics";
1314

1415
export const POST: RequestHandler = async ({ locals, request }) => {
1516
const body = await request.text();
@@ -115,6 +116,8 @@ export const POST: RequestHandler = async ({ locals, request }) => {
115116
...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
116117
});
117118

119+
MetricsServer.getMetrics().model.conversationsTotal.inc({ model: values.model });
120+
118121
return new Response(
119122
JSON.stringify({
120123
conversationId: res.insertedId.toString(),

src/routes/conversation/[id]/+server.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
2121
import { addChildren } from "$lib/utils/tree/addChildren.js";
2222
import { addSibling } from "$lib/utils/tree/addSibling.js";
2323
import { usageLimits } from "$lib/server/usageLimits";
24+
import { MetricsServer } from "$lib/server/metrics";
2425
import { textGeneration } from "$lib/server/textGeneration";
2526
import type { TextGenerationContext } from "$lib/server/textGeneration/types";
2627

@@ -293,6 +294,8 @@ export async function POST({ request, locals, params, getClientAddress }) {
293294

294295
let doneStreaming = false;
295296

297+
let lastTokenTimestamp: undefined | Date = undefined;
298+
296299
// we now build the stream
297300
const stream = new ReadableStream({
298301
async start(controller) {
@@ -306,6 +309,25 @@ export async function POST({ request, locals, params, getClientAddress }) {
306309
if (event.type === MessageUpdateType.Stream) {
307310
if (event.token === "") return;
308311
messageToWriteTo.content += event.token;
312+
313+
// add to token total
314+
MetricsServer.getMetrics().model.tokenCountTotal.inc({ model: model?.id });
315+
316+
// if this is the first token, add to time to first token
317+
if (!lastTokenTimestamp) {
318+
MetricsServer.getMetrics().model.timeToFirstToken.observe(
319+
{ model: model?.id },
320+
Date.now() - promptedAt.getTime()
321+
);
322+
lastTokenTimestamp = new Date();
323+
}
324+
325+
// add to time per token
326+
MetricsServer.getMetrics().model.timePerOutputToken.observe(
327+
{ model: model?.id },
328+
Date.now() - (lastTokenTimestamp ?? promptedAt).getTime()
329+
);
330+
lastTokenTimestamp = new Date();
309331
}
310332

311333
// Set the title
@@ -321,6 +343,12 @@ export async function POST({ request, locals, params, getClientAddress }) {
321343
else if (event.type === MessageUpdateType.FinalAnswer) {
322344
messageToWriteTo.interrupted = event.interrupted;
323345
messageToWriteTo.content = initialMessageContent + event.text;
346+
347+
// add to latency
348+
MetricsServer.getMetrics().model.latency.observe(
349+
{ model: model?.id },
350+
Date.now() - promptedAt.getTime()
351+
);
324352
}
325353

326354
// Add file
@@ -428,6 +456,8 @@ export async function POST({ request, locals, params, getClientAddress }) {
428456
);
429457
}
430458

459+
const metrics = MetricsServer.getMetrics();
460+
metrics.model.messagesTotal.inc({ model: model?.id });
431461
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
432462
return new Response(stream, {
433463
headers: {

0 commit comments

Comments
 (0)