Skip to content

Commit 98e29fe

Browse files
mongodbenBen Perlmutter
and
Ben Perlmutter
authored
(EAI-984): Update chatbot preprocessors to use GPT-4.1-nano + guardrail false positive fix (#684)
* update preprocessors for gpt-4.1-mini * update guardrail usage to fit new API * add new eval case * update preprocessors to use 4.1-nano * fix test for new guardrail return type * remove unused imports --------- Co-authored-by: Ben Perlmutter <mongodben@mongodb.com>
1 parent a7d4db0 commit 98e29fe

12 files changed

+304
-179
lines changed

packages/chatbot-server-mongodb-public/environments/production.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ env:
88
OPENAI_CHAT_COMPLETION_MODEL_VERSION: 2023-06-01-preview
99
ALLOWED_ORIGINS: https://mongodb.com,https://www.mongodb.com,https://learn.mongodb.com
1010
NODE_ENV: production
11-
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4o-mini
11+
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4.1-nano
1212
OPENAI_API_VERSION: "2024-06-01"
1313
OPENAI_CHAT_COMPLETION_DEPLOYMENT: gpt-4o
1414
OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT: "docs-chatbot-embedding-ada-002"

packages/chatbot-server-mongodb-public/environments/staging.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ env:
88
OPENAI_CHAT_COMPLETION_MODEL_VERSION: 2023-06-01-preview
99
ALLOWED_ORIGINS: https://knowledge.staging.corp.mongodb.com,https://docs-mongodborg-staging.corp.mongodb.com,https://mongodbcom-cdn.website.staging.corp.mongodb.com,https://docs-mongodb-org-stg.s3.us-east-2.amazonaws.com,https://learn-staging.mongodb.com,https://mongodbcom-cdn.staging.corp.mongodb.com,https://devcenter-chatbot.devrel.staging.corp.mongodb.com
1010
NODE_ENV: staging
11-
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4o-mini
11+
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4.1-nano
1212
OPENAI_API_VERSION: "2024-06-01"
1313
OPENAI_CHAT_COMPLETION_DEPLOYMENT: gpt-4o
1414
OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT: "docs-chatbot-embedding-ada-002"

packages/chatbot-server-mongodb-public/src/eval/evalHelpers.ts

+8-5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
import { EVAL_ENV_VARS } from "../EnvVars";
99
import { AzureOpenAI } from "mongodb-rag-core/openai";
1010
import { strict as assert } from "assert";
11+
import { wrapOpenAI } from "mongodb-rag-core/braintrust";
1112

1213
export const {
1314
JUDGE_EMBEDDING_MODEL,
@@ -26,11 +27,13 @@ export const {
2627
OPENAI_API_VERSION: "",
2728
});
2829

29-
export const openAiClient = new AzureOpenAI({
30-
apiKey: OPENAI_API_KEY,
31-
endpoint: OPENAI_ENDPOINT,
32-
apiVersion: OPENAI_API_VERSION,
33-
});
30+
export const openAiClient = wrapOpenAI(
31+
new AzureOpenAI({
32+
apiKey: OPENAI_API_KEY,
33+
endpoint: OPENAI_ENDPOINT,
34+
apiVersion: OPENAI_API_VERSION,
35+
})
36+
);
3437

3538
export function getLastUserMessageFromMessages(
3639
messages: SomeMessage[]

packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.eval.ts

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
import "dotenv/config";
12
import {
23
extractMongoDbMetadataFromUserMessage,
34
ExtractMongoDbMetadataFunction,
45
} from "./extractMongoDbMetadataFromUserMessage";
5-
import { Eval } from "braintrust";
6+
import { Eval } from "mongodb-rag-core/braintrust";
67
import { Scorer } from "autoevals";
78
import { MongoDbTag } from "../mongoDbMetadata";
89
import {
9-
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT,
1010
openAiClient,
11+
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT,
1112
} from "../eval/evalHelpers";
1213

1314
interface ExtractMongoDbMetadataEvalCase {
@@ -210,10 +211,10 @@ Eval("extract-mongodb-metadata", {
210211
experimentName: model,
211212
metadata: {
212213
description:
213-
"Evaluates whether the MongoDB user message guardrail is working correctly.",
214+
"Evaluates whether the MongoDB user message metadata extractor is working correctly.",
214215
model,
215216
},
216-
maxConcurrency: 3,
217+
maxConcurrency: 15,
217218
timeout: 20000,
218219
async task(input) {
219220
try {

packages/chatbot-server-mongodb-public/src/processors/extractMongoDbMetadataFromUserMessage.ts

+1-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ const name = "extract_mongodb_metadata";
3737
const description = "Extract MongoDB-related metadata from a user message";
3838

3939
const systemPrompt = `You are an expert data labeler employed by MongoDB.
40-
You must label metadata about the user query based on its context in the conversation.
41-
Your pay is determined by the accuracy of your labels as judged against other expert labelers, so do excellent work to maximize your earnings to support your family.`;
40+
You must label metadata about the user query based on its context in the conversation.`;
4241

4342
const fewShotExamples: OpenAI.Chat.ChatCompletionMessageParam[] = [
4443
// Example 1

packages/chatbot-server-mongodb-public/src/processors/makeFewShotUserMessageExtractorFunction.ts

+10-7
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ export interface MakeFewShotUserMessageExtractorFunctionParams<
1212
schema: T;
1313
};
1414
systemPrompt: string;
15-
fewShotExamples: OpenAI.ChatCompletionMessageParam[];
15+
fewShotExamples?: OpenAI.ChatCompletionMessageParam[];
1616
}
1717

1818
/**
@@ -64,7 +64,7 @@ export function makeFewShotUserMessageExtractorFunction<
6464
Original user message: ${userMessageText}`.trim(),
6565
} satisfies OpenAI.ChatCompletionMessageParam;
6666
const res = await openAiClient.chat.completions.create({
67-
messages: [systemPromptMessage, ...fewShotExamples, userMessage],
67+
messages: [systemPromptMessage, ...(fewShotExamples ?? []), userMessage],
6868
temperature: 0,
6969
model,
7070
tools: [toolDefinition],
@@ -74,12 +74,15 @@ export function makeFewShotUserMessageExtractorFunction<
7474
},
7575
stream: false,
7676
});
77-
const metadata = schema.parse(
78-
JSON.parse(
79-
res.choices[0]?.message?.tool_calls?.[0]?.function.arguments ?? "{}"
80-
)
77+
const resToolCall = JSON.parse(
78+
res.choices[0]?.message?.tool_calls?.[0]?.function.arguments ?? "{}"
8179
);
82-
return metadata;
80+
const metadata = schema.safeParse(resToolCall);
81+
// Return the raw tool call if it fails to parse
82+
if (!metadata.success) {
83+
return resToolCall as unknown as z.infer<T>;
84+
}
85+
return metadata.data;
8386
};
8487
}
8588

packages/chatbot-server-mongodb-public/src/processors/makeStepBackRagGenerateUserPrompt.ts

+5-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,10 @@ export const makeStepBackRagGenerateUserPrompt = ({
7373
messages: precedingMessagesToInclude,
7474
}),
7575
]);
76-
if (guardrailResult.rejectMessage) {
76+
if (
77+
guardrailResult.type === "inappropriate" ||
78+
guardrailResult.type === "irrelevant"
79+
) {
7780
const { reasoning } = guardrailResult;
7881
logRequest({
7982
reqId,
@@ -89,6 +92,7 @@ export const makeStepBackRagGenerateUserPrompt = ({
8992
rejectQuery: true,
9093
customData: {
9194
rejectionReason: reasoning,
95+
rejectionType: guardrailResult.type,
9296
},
9397
} satisfies UserMessage,
9498
rejectQuery: true,

packages/chatbot-server-mongodb-public/src/processors/makeStepBackUserQuery.eval.ts

+18-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { Scorer, EmbeddingSimilarity } from "autoevals";
2-
import { Eval } from "braintrust";
2+
import { Eval } from "mongodb-rag-core/braintrust";
33
import {
44
makeStepBackUserQuery,
55
StepBackUserQueryMongoDbFunction,
@@ -17,7 +17,7 @@ import {
1717
} from "../eval/evalHelpers";
1818

1919
interface ExtractMongoDbMetadataEvalCase {
20-
name: string;
20+
name?: string;
2121
input: {
2222
previousMessages?: Message[];
2323
userMessageText: string;
@@ -46,7 +46,10 @@ const evalCases: ExtractMongoDbMetadataEvalCase[] = [
4646
{
4747
name: "should step back based on previous messages",
4848
input: {
49-
userMessageText: "code example",
49+
userMessageText: updateFrontMatter("code example", {
50+
programmingLanguage: "javascript",
51+
mongoDbProduct: "Driver",
52+
}),
5053
previousMessages: [
5154
{
5255
role: "user",
@@ -142,6 +145,17 @@ const evalCases: ExtractMongoDbMetadataEvalCase[] = [
142145
} satisfies StepBackUserQueryMongoDbFunction,
143146
tags: ["performance", "indexes"],
144147
},
148+
{
149+
input: {
150+
userMessageText: updateFrontMatter("langchain quickstart", {
151+
mongoDbProduct: "Drivers",
152+
programmingLanguage: "python",
153+
}),
154+
},
155+
expected: {
156+
transformedUserQuery: "How do I get started with LangChain?",
157+
} satisfies StepBackUserQueryMongoDbFunction,
158+
},
145159
];
146160

147161
const QuerySimilarity: Scorer<
@@ -170,8 +184,7 @@ Eval("step-back-user-query", {
170184
"Evaluate the function that mutates the user query for better search results.",
171185
model,
172186
},
173-
maxConcurrency: 3,
174-
timeout: 20000,
187+
maxConcurrency: 10,
175188
async task(input) {
176189
try {
177190
return await makeStepBackUserQuery({

packages/chatbot-server-mongodb-public/src/processors/retrieveRelevantContent.eval.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const { k } = retrievalConfig.findNearestNeighborsOptions;
6969

7070
const retrieveRelevantContentEvalTask: EvalTask<
7171
RetrievalEvalCaseInput,
72-
RetrievalTaskOutput
72+
RetrievalTaskOutput,
73+
unknown
7374
> = async function (data) {
7475
const metadataForQuery = await extractMongoDbMetadataFromUserMessage({
7576
openAiClient: preprocessorOpenAiClient,

0 commit comments

Comments
 (0)