fix: catch errors more gracefully in reasoning specific calls

nsarrazin · nsarrazin · commit 71aae58ad073 · 2024-12-16T16:41:44.000Z
diff --git a/src/lib/server/textGeneration/generate.ts b/src/lib/server/textGeneration/generate.ts
@@ -9,6 +9,7 @@ import type { TextGenerationContext } from "./types";
 import type { EndpointMessage } from "../endpoints/endpoints";
 import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
 import { generateSummaryOfReasoning } from "./reasoning";
+import { logger } from "../logger";
 
 type GenerateContext = Omit<TextGenerationContext, "messages"> & { messages: EndpointMessage[] };
 
@@ -69,30 +70,35 @@ export async function* generate(
 					subtype: MessageReasoningUpdateType.Status,
 					status: "Summarizing reasoning...",
 				};
-				const summary = yield* generateFromDefaultEndpoint({
-					messages: [
-						{
-							from: "user",
-							content: `Question: ${
-								messages[messages.length - 1].content
-							}\n\nReasoning: ${reasoningBuffer}`,
-						},
-					],
-					preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the reasoning steps explicitly include a code solution, make sure to include it in your answer.
+				try {
+					const summary = yield* generateFromDefaultEndpoint({
+						messages: [
+							{
+								from: "user",
+								content: `Question: ${
+									messages[messages.length - 1].content
+								}\n\nReasoning: ${reasoningBuffer}`,
+							},
+						],
+						preprompt: `Your task is to summarize concisely all your reasoning steps and then give the final answer. Keep it short, one short paragraph at most. If the reasoning steps explicitly include a code solution, make sure to include it in your answer.
 
 If the user is just having a casual conversation that doesn't require explanations, answer directly without explaining your steps, otherwise make sure to summarize step by step, make sure to skip dead-ends in your reasoning and removing excess detail.
 
 Do not use prefixes such as Response: or Answer: when answering to the user.`,
-					generateSettings: {
-						max_new_tokens: 1024,
-					},
-				});
-				finalAnswer = summary;
-				yield {
-					type: MessageUpdateType.Reasoning,
-					subtype: MessageReasoningUpdateType.Status,
-					status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
-				};
+						generateSettings: {
+							max_new_tokens: 1024,
+						},
+					});
+					finalAnswer = summary;
+					yield {
+						type: MessageUpdateType.Reasoning,
+						subtype: MessageReasoningUpdateType.Status,
+						status: `Done in ${Math.round((new Date().getTime() - startTime.getTime()) / 1000)}s.`,
+					};
+				} catch (e) {
+					finalAnswer = text;
+					logger.error(e);
+				}
 			}
 
 			yield {
@@ -143,9 +149,13 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 			// create a new status every 5 seconds
 			if (new Date().getTime() - lastReasoningUpdate.getTime() > 4000) {
 				lastReasoningUpdate = new Date();
-				generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
-					status = summary;
-				});
+				try {
+					generateSummaryOfReasoning(reasoningBuffer).then((summary) => {
+						status = summary;
+					});
+				} catch (e) {
+					logger.error(e);
+				}
 			}
 			yield {
 				type: MessageUpdateType.Reasoning,
diff --git a/src/lib/server/textGeneration/reasoning.ts b/src/lib/server/textGeneration/reasoning.ts
@@ -1,6 +1,7 @@
 import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint";
 
 import { getReturnFromGenerator } from "$lib/utils/getReturnFromGenerator";
+import { logger } from "../logger";
 
 export async function generateSummaryOfReasoning(buffer: string): Promise<string> {
 	// debug 5s delay
@@ -21,10 +22,15 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
 				max_new_tokens: 50,
 			},
 		})
-	).then((summary) => {
-		const parts = summary.split("...");
-		return parts[0] + "...";
-	});
+	)
+		.then((summary) => {
+			const parts = summary.split("...");
+			return parts[0] + "...";
+		})
+		.catch((e) => {
+			logger.error(e);
+			return "Reasoning...";
+		});
 
 	return summary;
 }