diff --git a/packages/mongodb-rag-core/src/DataStreamer.ts b/packages/mongodb-rag-core/src/DataStreamer.ts index 95fd1f5ba..423e6ec21 100644 --- a/packages/mongodb-rag-core/src/DataStreamer.ts +++ b/packages/mongodb-rag-core/src/DataStreamer.ts @@ -1,12 +1,16 @@ import { Response } from "express"; -import { OpenAiStreamingResponse } from "./llm"; import { References } from "./References"; import { logger } from "./logger"; +import OpenAI from "openai"; export function escapeNewlines(str: string): string { return str.replaceAll(`\n`, `\\n`); } +export type OpenAiStreamingResponse = AsyncIterable< + Omit +>; + interface ServerSentEventDispatcher { connect(): void; disconnect(): void; diff --git a/packages/mongodb-rag-core/src/index.ts b/packages/mongodb-rag-core/src/index.ts index 594e32ba8..70b198069 100644 --- a/packages/mongodb-rag-core/src/index.ts +++ b/packages/mongodb-rag-core/src/index.ts @@ -1,5 +1,3 @@ -export * from "./llm"; -export * as Llm from "./llm"; export * from "./chunk"; export * as Chunk from "./chunk"; export * from "./contentStore"; diff --git a/packages/mongodb-rag-core/src/llm/ChatLlm.ts b/packages/mongodb-rag-core/src/llm/ChatLlm.ts deleted file mode 100644 index 6f1be4980..000000000 --- a/packages/mongodb-rag-core/src/llm/ChatLlm.ts +++ /dev/null @@ -1,153 +0,0 @@ -/** - @fileoverview This file contains the interfaces for the LLM service. - Note that the LLM service is based on the OpenAI API, so the interface borrow - from that. This interface could still work with non OpenAI providers if they - implement the same interface. - */ -import { Request as ExpressRequest } from "express"; -import OpenAI from "openai"; -import { Reference } from "../References"; -import { Conversation } from "../conversations"; -import { DataStreamer } from "../DataStreamer"; - -export type OpenAiMessageRole = "system" | "assistant" | "user" | "function"; - -export type OpenAiChatMessage = OpenAI.ChatCompletionMessageParam & { - /** - The role of the message in the context of the conversation. - */ - role: OpenAiMessageRole; - - /** - The vector representation of the content. - */ - embedding?: number[]; - content: string | null; -}; - -export type SystemPrompt = OpenAiChatMessage & { - role: "system"; - content: string; -}; - -export interface LlmAnswerQuestionParams { - messages: OpenAiChatMessage[]; - toolCallOptions?: ToolCallDirective; -} -/** - Tool for the chatbot to use. - */ -export interface Tool { - /** - Function definition for the LLM to invoke. - */ - definition: OpenAI.FunctionDefinition; - - /** - Call the function based on the arguments in the {@link Tool.definition}. - */ - call(args: ToolCallParams): Promise; -} - -/** - Parameters for invoking a tool call. - */ -export interface ToolCallParams { - functionArgs: unknown; - - /** - Conversation in the DB. Useful for getting metadata to use in tool calls. - */ - conversation?: Conversation; - - /** - Data streamer with connection open to send events to the client. - - For example, you could use this to send updates about - what the tool is doing to the client. - - */ - dataStreamer?: DataStreamer; - - /** - Current Express.js request from the client. - Useful for getting metadata to use in tool calls. - */ - request?: ExpressRequest; -} - -export type OpenAIChatCompletionWithoutUsage = Omit< - OpenAI.ChatCompletion, - "usage" ->; - -export type OpenAiStreamingResponse = AsyncIterable< - Omit ->; -export type OpenAiAwaitedResponse = Partial & - Pick; - -export interface CallToolResponse { - /** - Message to add to the conversation. - */ - toolCallMessage: OpenAiChatMessage; - - /** - If `true`, the user query should be rejected. - You can use this to short circuit the conversation, - and return the {@link ConversationConstants.NO_RELEVANT_CONTENT} message. - */ - rejectUserQuery?: boolean; - - /** - References to add to the {@link AssistantMessage} sent to the user. - */ - references?: Reference[]; -} - -export type ToolCallDirective = OpenAI.ChatCompletionFunctionCallOption; - -/** - Parameters for invoking a tool call from the LLM. - */ -export interface LlmCallToolParams { - /** - Messages to send to the LLM. The tool call invocation information - should be in the last message. - */ - messages: OpenAiChatMessage[]; - - /** - Conversation in the DB. Useful for getting metadata to use in tool calls. - */ - conversation?: Conversation; - - /** - Data streamer with connection open to send events to the client. - - For example, you could use this to send updates about - what the tool is doing to the client. - - */ - dataStreamer?: DataStreamer; - - /** - Current Express.js request from the client. - Useful for getting metadata to use in tool calls. - */ - request?: ExpressRequest; -} - -/** - LLM that responds to user queries. Provides both streaming and awaited options. - */ -export interface ChatLlm { - answerQuestionStream( - params: LlmAnswerQuestionParams - ): Promise; - answerQuestionAwaited( - params: LlmAnswerQuestionParams - ): Promise; - callTool?(params: LlmCallToolParams): Promise; -} diff --git a/packages/mongodb-rag-core/src/llm/LangchainChatLlm.test.ts b/packages/mongodb-rag-core/src/llm/LangchainChatLlm.test.ts deleted file mode 100644 index f7f9fab92..000000000 --- a/packages/mongodb-rag-core/src/llm/LangchainChatLlm.test.ts +++ /dev/null @@ -1,119 +0,0 @@ -import "dotenv/config"; -import { OpenAiChatMessage } from "./ChatLlm"; -import { makeLangchainChatLlm } from "./LangchainChatLlm"; -import { FakeListChatModel } from "@langchain/core/utils/testing"; -import { ChatOpenAI } from "@langchain/openai"; -import { ChatAnthropic } from "@langchain/anthropic"; -import { assertEnvVars } from "../assertEnvVars"; -import { CORE_OPENAI_CHAT_COMPLETION_ENV_VARS } from "../CoreEnvVars"; - -jest.setTimeout(30000); - -const { - OPENAI_ENDPOINT, - OPENAI_API_KEY, - OPENAI_CHAT_COMPLETION_DEPLOYMENT, - OPENAI_CHAT_COMPLETION_MODEL_VERSION, -} = assertEnvVars(CORE_OPENAI_CHAT_COMPLETION_ENV_VARS); - -const fakeResponses = ["I'll callback later.", "You 'console' them!"]; -const makeFakeChat = () => - new FakeListChatModel({ - responses: fakeResponses, - }); -const messages = [ - { role: "user", content: "Tell me a JavaScript pun" }, -] satisfies OpenAiChatMessage[]; - -describe("LangchainChatLlm", () => { - it("should generate response - awaited", async () => { - const langchainChatLlm = makeLangchainChatLlm({ - chatModel: makeFakeChat(), - }); - const { role, content } = await langchainChatLlm.answerQuestionAwaited({ - messages, - }); - expect(role).toBe("assistant"); - expect(content).toBe(fakeResponses[0]); - }); - it("should generate response - streamed", async () => { - const langchainChatLlm = makeLangchainChatLlm({ - chatModel: makeFakeChat(), - }); - const res = await langchainChatLlm.answerQuestionStream({ - messages, - }); - let content = ""; - for await (const event of res) { - content += event.choices[0].delta?.content; - } - expect(content).toBe(fakeResponses[0]); - }); - it("should work with Azure OpenAI", async () => { - const model = new ChatOpenAI({ - azureOpenAIApiKey: OPENAI_API_KEY, - azureOpenAIApiDeploymentName: OPENAI_CHAT_COMPLETION_DEPLOYMENT, - azureOpenAIApiInstanceName: getAzureInstanceName(OPENAI_ENDPOINT), - azureOpenAIApiVersion: OPENAI_CHAT_COMPLETION_MODEL_VERSION, - }); - const azureLangchainChatLlm = makeLangchainChatLlm({ - chatModel: model, - }); - const { role, content } = await azureLangchainChatLlm.answerQuestionAwaited( - { - messages, - } - ); - expect(role).toBe("assistant"); - expect(content).toBeTruthy(); - - const res = await azureLangchainChatLlm.answerQuestionStream({ - messages, - }); - let contentStream = ""; - for await (const event of res) { - contentStream += event.choices[0].delta?.content; - } - expect(contentStream).toBeTruthy(); - }); - - // Skipped because we do not have a MongoDB-org Anthropic subscription - it.skip("should work with Anthropic", async () => { - const model = new ChatAnthropic({ - temperature: 0.9, - anthropicApiKey: process.env.ANTHROPIC_API_KEY, - maxTokens: 1024, - }); - const azureLangchainChatLlm = makeLangchainChatLlm({ - chatModel: model, - }); - const { role, content } = await azureLangchainChatLlm.answerQuestionAwaited( - { - messages, - } - ); - expect(role).toBe("assistant"); - expect(content).toBeTruthy(); - - const res = await azureLangchainChatLlm.answerQuestionStream({ - messages, - }); - let contentStream = ""; - for await (const event of res) { - contentStream += event.choices[0].delta?.content; - } - expect(contentStream).toBeTruthy(); - }); -}); - -function getAzureInstanceName(azureDeploymenUrl: string) { - // Parse the URL using the URL API - const url = new URL(azureDeploymenUrl); - - // Extract the hostname - const hostname = url.hostname; - - // Assuming the format is always [subdomain].openai.azure.com, split by "." and take the first part - const subdomain = hostname.split(".")[0]; - return subdomain; -} diff --git a/packages/mongodb-rag-core/src/llm/LangchainChatLlm.ts b/packages/mongodb-rag-core/src/llm/LangchainChatLlm.ts deleted file mode 100644 index b99f3adf9..000000000 --- a/packages/mongodb-rag-core/src/llm/LangchainChatLlm.ts +++ /dev/null @@ -1,75 +0,0 @@ -import { - BaseChatModel, - BaseChatModelCallOptions, -} from "@langchain/core/language_models/chat_models"; -import { - BaseMessagePromptTemplateLike, - ChatPromptTemplate, -} from "@langchain/core/prompts"; -import { ChatLlm, OpenAiChatMessage } from "./ChatLlm"; -import { AssistantMessage } from "../conversations"; - -export interface MakeLangchainChatLlmProps { - chatModel: BaseChatModel; - callOptions?: BaseChatModelCallOptions; -} - -/** - Use any Langchain JS [`ChatModel`](https://js.langchain.com/docs/modules/model_io/chat/) - to talk to an LLM. - - Note: This ChatLLM does not currently support tool calling. - */ -export function makeLangchainChatLlm({ - chatModel, - callOptions, -}: MakeLangchainChatLlmProps): ChatLlm { - return { - async answerQuestionAwaited({ messages }) { - const prompts = ChatPromptTemplate.fromMessages( - messages.map((m) => messageBaseMessagePromptTemplateLike(m)) - ); - const chain = prompts.pipe(chatModel); - const res = await chain.invoke({}, callOptions); - return { - role: "assistant", - content: typeof res.content === "string" ? res.content : "", - } satisfies AssistantMessage; - }, - answerQuestionStream: async ({ messages }) => - (async function* () { - const prompts = ChatPromptTemplate.fromMessages( - messages.map((m) => messageBaseMessagePromptTemplateLike(m)) - ); - const chain = prompts.pipe(chatModel); - const stream = await chain.stream({}, callOptions); - let index = 0; - for await (const chunk of stream) { - index++; - yield { - id: index.toString(), - created: Date.now(), - choices: [ - { - finish_reason: null, - index: index, - delta: { - role: "assistant", - content: - typeof chunk.content === "string" ? chunk.content : "", - tool_calls: [], - }, - }, - ], - promptFilterResults: [], - }; - } - })(), - }; -} - -function messageBaseMessagePromptTemplateLike( - message: OpenAiChatMessage -): BaseMessagePromptTemplateLike { - return [message.role, message.content ?? ""]; -} diff --git a/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.test.ts b/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.test.ts deleted file mode 100644 index 612bc54e2..000000000 --- a/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -import "dotenv/config"; -import { AzureOpenAI } from "openai"; -import { ChatLlm, OpenAiChatMessage, Tool } from "./ChatLlm"; -import { makeOpenAiChatLlm } from "./OpenAiChatLlm"; -import { assertEnvVars } from "../assertEnvVars"; -import { CORE_ENV_VARS } from "../CoreEnvVars"; -import { strict as assert } from "assert"; -import { SystemMessage } from "../conversations"; - -const systemPrompt = { - role: "system", - content: "You shall do as you're told", -} satisfies SystemMessage; -jest.setTimeout(30000); -const { - OPENAI_ENDPOINT, - OPENAI_API_KEY, - OPENAI_CHAT_COMPLETION_DEPLOYMENT, - OPENAI_API_VERSION, -} = assertEnvVars(CORE_ENV_VARS); - -const conversation = [ - systemPrompt, - { - role: "user", - content: "How do I connect to my cluster?", - }, -] as OpenAiChatMessage[]; -const testTools = [ - { - definition: { - name: "test_tool", - description: "Test tool", - parameters: { - type: "object", - properties: { - test: { - description: "Test parameter. Always be the string 'test'.", - example: "test", - type: "string", - }, - }, - required: ["test"], - }, - }, - async call() { - return { - toolCallMessage: { - role: "assistant", - name: "test_tool", - content: "Test tool called", - }, - rejectUserQuery: false, - references: [ - { - title: "test", - url: "https://docs.mongodb.com", - }, - ], - }; - }, - }, -] satisfies Tool[]; - -const openAiClient = new AzureOpenAI({ - apiKey: OPENAI_API_KEY, - endpoint: OPENAI_ENDPOINT, - apiVersion: OPENAI_API_VERSION, -}); -const toolOpenAiLlm = makeOpenAiChatLlm({ - openAiClient, - deployment: OPENAI_CHAT_COMPLETION_DEPLOYMENT, - openAiLmmConfigOptions: { - temperature: 0, - max_tokens: 500, - function_call: "none", - }, - tools: testTools, -}); - -describe("OpenAiLlm", () => { - let openAiLlmService: ChatLlm; - beforeAll(() => { - openAiLlmService = toolOpenAiLlm; - }); - - test("should answer question in conversation - awaited", async () => { - const response = await openAiLlmService.answerQuestionAwaited({ - messages: conversation, - }); - expect(response.role).toBe("assistant"); - expect(typeof response.content).toBe("string"); - }); - - test("should answer question in conversation - streamed", async () => { - const events = await openAiLlmService.answerQuestionStream({ - messages: conversation, - }); - let count = 0; - let message = ""; - await (async () => { - for await (const event of events) { - count++; - for (const choice of event.choices) { - const delta = choice.delta?.content; - if (delta !== undefined) { - message += delta; - } - } - } - })(); - expect(count).toBeGreaterThan(10); - expect(typeof message).toBe("string"); - }); - - test("should call tool", async () => { - const response = await toolOpenAiLlm.answerQuestionAwaited({ - messages: [ - { - role: "user", - content: "hi", - }, - ], - toolCallOptions: { - name: testTools[0].definition.name, - }, - }); - assert( - response.role === "assistant" && response.function_call !== undefined - ); - const toolResponse = await toolOpenAiLlm.callTool({ messages: [response] }); - expect(response.role).toBe("assistant"); - expect(response.function_call?.name).toBe("test_tool"); - expect(response.function_call?.arguments).toBeTruthy(); - expect(JSON.parse(response.function_call?.arguments ?? "")).toStrictEqual({ - test: "test", - }); - expect(toolResponse).toStrictEqual({ - toolCallMessage: { - role: "assistant", - name: "test_tool", - content: "Test tool called", - }, - rejectUserQuery: false, - references: [ - { - title: "test", - url: "https://docs.mongodb.com", - }, - ], - }); - }); - test("should throw error if calls tool that does not exist", async () => { - await expect( - toolOpenAiLlm.callTool({ - messages: [ - { - role: "assistant", - function_call: { - name: "not_a_tool", - arguments: JSON.stringify({ - test: "test", - }), - }, - content: "", - }, - ], - }) - ).rejects.toThrow("Tool not found"); - }); - test("should throw error if calls a tool on a message that isn't a tool call", async () => { - await expect( - toolOpenAiLlm.callTool({ - messages: [ - { - role: "assistant", - content: "", - }, - ], - }) - ).rejects.toThrow("Message must be a tool call"); - }); -}); diff --git a/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.ts b/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.ts deleted file mode 100644 index 02e8c594f..000000000 --- a/packages/mongodb-rag-core/src/llm/OpenAiChatLlm.ts +++ /dev/null @@ -1,96 +0,0 @@ -import OpenAI from "openai"; -import { strict as assert } from "assert"; -import { ChatLlm, LlmAnswerQuestionParams, Tool } from "./ChatLlm"; - -/** - Configuration for the {@link makeOpenAiChatLlm} function. - */ -export interface MakeOpenAiChatLlmParams { - deployment: string; - openAiClient: OpenAI; - openAiLmmConfigOptions?: Omit< - OpenAI.ChatCompletionCreateParams, - "model" | "messages" - >; - tools?: Tool[]; -} - -/** - Construct the {@link ChatLlm} service using the [OpenAI client](https://www.npmjs.com/package/openai). - */ -export function makeOpenAiChatLlm({ - deployment, - openAiClient, - openAiLmmConfigOptions, - tools, -}: MakeOpenAiChatLlmParams): Required { - const toolDict: { [key: string]: Tool } = {}; - tools?.forEach((tool) => { - const name = tool.definition.name; - toolDict[name] = tool; - }); - - return { - async answerQuestionStream({ - messages, - toolCallOptions, - }: LlmAnswerQuestionParams) { - const completionStream = await openAiClient.chat.completions.create({ - model: deployment, - messages, - ...(openAiLmmConfigOptions ?? {}), - ...(toolCallOptions ? { function_call: toolCallOptions } : {}), - functions: tools?.map((tool) => tool.definition), - stream: true, - }); - return completionStream; - }, - async answerQuestionAwaited({ - messages, - toolCallOptions, - }: LlmAnswerQuestionParams) { - const { - choices: [choice], - } = await openAiClient.chat.completions.create({ - model: deployment, - messages, - ...(openAiLmmConfigOptions ?? {}), - ...(toolCallOptions ? { function_call: toolCallOptions } : {}), - functions: tools?.map((tool) => tool.definition), - stream: false, - }); - const { message } = choice; - if (!message) { - throw new Error("No message returned from OpenAI"); - } - return message; - }, - async callTool({ messages, conversation, dataStreamer, request }) { - const lastMessage = messages[messages.length - 1]; - // Only call tool if the message is an assistant message with a function call. - assert( - lastMessage.role === "assistant" && - lastMessage.function_call !== undefined, - `Message must be a tool call` - ); - assert( - lastMessage.function_call !== null, - `Function call must be defined` - ); - assert( - Object.keys(toolDict).includes(lastMessage.function_call.name), - `Tool not found` - ); - - const { function_call } = lastMessage; - const tool = toolDict[function_call.name]; - const toolResponse = await tool.call({ - functionArgs: JSON.parse(function_call.arguments), - conversation, - dataStreamer, - request, - }); - return toolResponse; - }, - }; -} diff --git a/packages/mongodb-rag-core/src/llm/index.ts b/packages/mongodb-rag-core/src/llm/index.ts deleted file mode 100644 index f395a2e11..000000000 --- a/packages/mongodb-rag-core/src/llm/index.ts +++ /dev/null @@ -1,3 +0,0 @@ -export * from "./ChatLlm"; -export * from "./LangchainChatLlm"; -export * from "./OpenAiChatLlm"; diff --git a/packages/scripts/src/findFaq.ts b/packages/scripts/src/findFaq.ts index 3077ef32c..de3b6cd37 100644 --- a/packages/scripts/src/findFaq.ts +++ b/packages/scripts/src/findFaq.ts @@ -10,10 +10,10 @@ import { VectorStore, FindNearestNeighborsOptions, WithScore, - ChatLlm, } from "mongodb-chatbot-server"; import { clusterize, DbscanOptions } from "./clusterize"; import { findCentroid } from "./findCentroid"; +import { generateText, LanguageModel } from "mongodb-rag-core/aiSdk"; export type ResponseMessage = AssistantMessage | ToolMessage; @@ -298,12 +298,13 @@ export const assignRepresentativeQuestion = async ({ llm, }: { faq: FaqEntry[]; - llm: ChatLlm; + llm: LanguageModel; }): Promise => { return await Promise.all( faq.map(async (q) => { try { - const representativeQuestion = await llm.answerQuestionAwaited({ + const { text: representativeQuestion } = await generateText({ + model: llm, messages: [ { role: "user", @@ -314,12 +315,9 @@ export const assignRepresentativeQuestion = async ({ ], }); console.log( - `Generated representative question: "${q.question}" -> "${representativeQuestion.content}"` + `Generated representative question: "${q.question}" -> "${representativeQuestion}"` ); - if (representativeQuestion.content === null) { - throw new Error("llm returned null!"); - } - return { ...q, question: representativeQuestion.content }; + return { ...q, question: representativeQuestion }; } catch (error) { console.warn( `Failed to generate representation question for '${q.question}': ${