Skip to content

(EAI-984): Update chatbot preprocessors to use GPT-4.1-nano + guardrail false positive fix #684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:
OPENAI_CHAT_COMPLETION_MODEL_VERSION: 2023-06-01-preview
ALLOWED_ORIGINS: https://mongodb.com,https://www.mongodb.com,https://learn.mongodb.com
NODE_ENV: production
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4o-mini
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4.1-nano
OPENAI_API_VERSION: "2024-06-01"
OPENAI_CHAT_COMPLETION_DEPLOYMENT: gpt-4o
OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT: "docs-chatbot-embedding-ada-002"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:
OPENAI_CHAT_COMPLETION_MODEL_VERSION: 2023-06-01-preview
ALLOWED_ORIGINS: https://knowledge.staging.corp.mongodb.com,https://docs-mongodborg-staging.corp.mongodb.com,https://mongodbcom-cdn.website.staging.corp.mongodb.com,https://docs-mongodb-org-stg.s3.us-east-2.amazonaws.com,https://learn-staging.mongodb.com,https://mongodbcom-cdn.staging.corp.mongodb.com,https://devcenter-chatbot.devrel.staging.corp.mongodb.com
NODE_ENV: staging
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4o-mini
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT: gpt-4.1-nano
OPENAI_API_VERSION: "2024-06-01"
OPENAI_CHAT_COMPLETION_DEPLOYMENT: gpt-4o
OPENAI_VERIFIED_ANSWER_EMBEDDING_DEPLOYMENT: "docs-chatbot-embedding-ada-002"
Expand Down
13 changes: 8 additions & 5 deletions packages/chatbot-server-mongodb-public/src/eval/evalHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
import { EVAL_ENV_VARS } from "../EnvVars";
import { AzureOpenAI } from "mongodb-rag-core/openai";
import { strict as assert } from "assert";
import { wrapOpenAI } from "mongodb-rag-core/braintrust";

export const {
JUDGE_EMBEDDING_MODEL,
Expand All @@ -26,11 +27,13 @@ export const {
OPENAI_API_VERSION: "",
});

export const openAiClient = new AzureOpenAI({
apiKey: OPENAI_API_KEY,
endpoint: OPENAI_ENDPOINT,
apiVersion: OPENAI_API_VERSION,
});
export const openAiClient = wrapOpenAI(
new AzureOpenAI({
apiKey: OPENAI_API_KEY,
endpoint: OPENAI_ENDPOINT,
apiVersion: OPENAI_API_VERSION,
})
);

export function getLastUserMessageFromMessages(
messages: SomeMessage[]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import "dotenv/config";
import {
extractMongoDbMetadataFromUserMessage,
ExtractMongoDbMetadataFunction,
} from "./extractMongoDbMetadataFromUserMessage";
import { Eval } from "braintrust";
import { Eval } from "mongodb-rag-core/braintrust";
import { Scorer } from "autoevals";
import { MongoDbTag } from "../mongoDbMetadata";
import {
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT,
openAiClient,
OPENAI_PREPROCESSOR_CHAT_COMPLETION_DEPLOYMENT,
} from "../eval/evalHelpers";

interface ExtractMongoDbMetadataEvalCase {
Expand Down Expand Up @@ -210,10 +211,10 @@ Eval("extract-mongodb-metadata", {
experimentName: model,
metadata: {
description:
"Evaluates whether the MongoDB user message guardrail is working correctly.",
"Evaluates whether the MongoDB user message metadata extractor is working correctly.",
model,
},
maxConcurrency: 3,
maxConcurrency: 15,
timeout: 20000,
async task(input) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ const name = "extract_mongodb_metadata";
const description = "Extract MongoDB-related metadata from a user message";

const systemPrompt = `You are an expert data labeler employed by MongoDB.
You must label metadata about the user query based on its context in the conversation.
Your pay is determined by the accuracy of your labels as judged against other expert labelers, so do excellent work to maximize your earnings to support your family.`;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heh did we decide this doesn't help?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the evals determined that 😄

You must label metadata about the user query based on its context in the conversation.`;

const fewShotExamples: OpenAI.Chat.ChatCompletionMessageParam[] = [
// Example 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export interface MakeFewShotUserMessageExtractorFunctionParams<
schema: T;
};
systemPrompt: string;
fewShotExamples: OpenAI.ChatCompletionMessageParam[];
fewShotExamples?: OpenAI.ChatCompletionMessageParam[];
}

/**
Expand Down Expand Up @@ -64,7 +64,7 @@ export function makeFewShotUserMessageExtractorFunction<
Original user message: ${userMessageText}`.trim(),
} satisfies OpenAI.ChatCompletionMessageParam;
const res = await openAiClient.chat.completions.create({
messages: [systemPromptMessage, ...fewShotExamples, userMessage],
messages: [systemPromptMessage, ...(fewShotExamples ?? []), userMessage],
temperature: 0,
model,
tools: [toolDefinition],
Expand All @@ -74,12 +74,15 @@ export function makeFewShotUserMessageExtractorFunction<
},
stream: false,
});
const metadata = schema.parse(
JSON.parse(
res.choices[0]?.message?.tool_calls?.[0]?.function.arguments ?? "{}"
)
const resToolCall = JSON.parse(
res.choices[0]?.message?.tool_calls?.[0]?.function.arguments ?? "{}"
);
return metadata;
const metadata = schema.safeParse(resToolCall);
// Return the raw tool call if it fails to parse
if (!metadata.success) {
return resToolCall as unknown as z.infer<T>;
}
return metadata.data;
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,10 @@ export const makeStepBackRagGenerateUserPrompt = ({
messages: precedingMessagesToInclude,
}),
]);
if (guardrailResult.rejectMessage) {
if (
guardrailResult.type === "inappropriate" ||
guardrailResult.type === "irrelevant"
) {
const { reasoning } = guardrailResult;
logRequest({
reqId,
Expand All @@ -89,6 +92,7 @@ export const makeStepBackRagGenerateUserPrompt = ({
rejectQuery: true,
customData: {
rejectionReason: reasoning,
rejectionType: guardrailResult.type,
},
} satisfies UserMessage,
rejectQuery: true,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Scorer, EmbeddingSimilarity } from "autoevals";
import { Eval } from "braintrust";
import { Eval } from "mongodb-rag-core/braintrust";
import {
makeStepBackUserQuery,
StepBackUserQueryMongoDbFunction,
Expand All @@ -17,7 +17,7 @@ import {
} from "../eval/evalHelpers";

interface ExtractMongoDbMetadataEvalCase {
name: string;
name?: string;
input: {
previousMessages?: Message[];
userMessageText: string;
Expand Down Expand Up @@ -46,7 +46,10 @@ const evalCases: ExtractMongoDbMetadataEvalCase[] = [
{
name: "should step back based on previous messages",
input: {
userMessageText: "code example",
userMessageText: updateFrontMatter("code example", {
programmingLanguage: "javascript",
mongoDbProduct: "Driver",
}),
previousMessages: [
{
role: "user",
Expand Down Expand Up @@ -142,6 +145,17 @@ const evalCases: ExtractMongoDbMetadataEvalCase[] = [
} satisfies StepBackUserQueryMongoDbFunction,
tags: ["performance", "indexes"],
},
{
input: {
userMessageText: updateFrontMatter("langchain quickstart", {
mongoDbProduct: "Drivers",
programmingLanguage: "python",
}),
},
expected: {
transformedUserQuery: "How do I get started with LangChain?",
} satisfies StepBackUserQueryMongoDbFunction,
},
];

const QuerySimilarity: Scorer<
Expand Down Expand Up @@ -170,8 +184,7 @@ Eval("step-back-user-query", {
"Evaluate the function that mutates the user query for better search results.",
model,
},
maxConcurrency: 3,
timeout: 20000,
maxConcurrency: 10,
async task(input) {
try {
return await makeStepBackUserQuery({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ const { k } = retrievalConfig.findNearestNeighborsOptions;

const retrieveRelevantContentEvalTask: EvalTask<
RetrievalEvalCaseInput,
RetrievalTaskOutput
RetrievalTaskOutput,
unknown
> = async function (data) {
const metadataForQuery = await extractMongoDbMetadataFromUserMessage({
openAiClient: preprocessorOpenAiClient,
Expand Down
Loading