Skip to content

Commit 9324e6c

Browse files
committed
refactor: Minor updates
1 parent d2c77d6 commit 9324e6c

File tree

5 files changed

+158
-72
lines changed

5 files changed

+158
-72
lines changed

config/env/.env.template

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ RAG_QUERY_API_KEY="RAG_QUERY_API_KEY"
1313
# RAG Management API Key (protects management endpoints)
1414
RAG_MANAGEMENT_API_KEY="RAG_MANAGEMENT_API_KEY"
1515

16+
# Number of last messages from conversation history to consider for RAG query context.
17+
# Set to 0 to use all messages. If not set, defaults to 1 (or as defined in config.ts).
18+
RAG_CONVERSATION_WINDOW_SIZE=1
19+
1620
# Gemini Configuration
1721
GEMINI_BASE_URL="https://generativelanguage.googleapis.com/v1beta"
1822
GEMINI_API_KEY="GOOGLE_GEMINI_API_KEY"

src/config.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,6 @@ export const config = {
4343
ragManagementApiKey: process.env.RAG_MANAGEMENT_API_KEY!,
4444

4545
ragQueryApiKey: process.env.RAG_QUERY_API_KEY!,
46+
47+
ragConversationWindowSize: parseInt(process.env.RAG_CONVERSATION_WINDOW_SIZE || '', 10) || 1,
4648
};

src/controllers/geminiQuery.ts

Lines changed: 143 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -4,50 +4,81 @@ import { config } from '../config';
44
import Logger from '../logger';
55
import { generateText } from '../services/llmWrapper';
66
import { initializedRagService } from '../services/ragService';
7-
import { GeminiQueryRequest, LLMChatResponse } from '../types';
7+
import { GeminiContent, GeminiQueryRequest, LLMChatResponse } from '../types';
88

99
export const handleGeminiBatch = async (req: Request, res: Response) => {
1010
const model = req.params.model;
11-
let userQuery = ''; // Define userQuery here to be accessible in the catch block
11+
let userQueryForRAG = ''; // For RAG and general query representation
1212

1313
try {
1414
const { contents } = req.body as GeminiQueryRequest;
1515

16-
// Validate contents structure
16+
// Initial structural validation (can be enhanced to check all items)
1717
if (
1818
!contents ||
1919
!Array.isArray(contents) ||
2020
contents.length === 0 ||
21-
!contents[0].parts ||
22-
!Array.isArray(contents[0].parts) ||
23-
contents[0].parts.length === 0 ||
24-
!contents[0].parts[0].text ||
25-
typeof contents[0].parts[0].text !== 'string' ||
26-
contents[0].parts[0].text.trim() === ''
21+
!contents.every(
22+
(item) =>
23+
item.parts &&
24+
Array.isArray(item.parts) &&
25+
item.parts.length > 0 &&
26+
item.parts.every((part) => part.text && typeof part.text === 'string')
27+
)
2728
) {
2829
return res.status(400).json({
2930
error:
30-
'Bad Request: contents is required and must be an array with at least one part containing a non-empty text string.',
31+
'Bad Request: contents is required and must be an array of content items, each with at least one part containing a non-empty text string.',
3132
});
3233
}
33-
userQuery = contents[0].parts[0].text; // Assign userQuery after validation
34+
35+
// Create userQueryForRAG from all parts of all content items
36+
if (contents && Array.isArray(contents)) {
37+
let messagesToConsider = contents;
38+
const windowSize = config.ragConversationWindowSize;
39+
40+
if (windowSize && windowSize > 0 && windowSize < contents.length) {
41+
messagesToConsider = contents.slice(-windowSize); // Get the last N messages
42+
Logger.info(`RAG windowing: Using last ${windowSize} of ${contents.length} messages for RAG query.`);
43+
} else if (windowSize && windowSize > 0 && windowSize >= contents.length) {
44+
Logger.info(
45+
`RAG windowing: Window size ${windowSize} is >= total messages ${contents.length}. Using all messages for RAG query.`
46+
);
47+
// messagesToConsider remains 'contents'
48+
} else {
49+
// windowSize is 0 or not set
50+
Logger.info(`RAG windowing: Window size is 0 or not set. Using all ${contents.length} messages for RAG query.`);
51+
// messagesToConsider remains 'contents'
52+
}
53+
54+
userQueryForRAG = messagesToConsider
55+
.flatMap((contentItem) => contentItem.parts.map((part) => part.text))
56+
.join('\n');
57+
}
58+
// userQueryForErrorLogging removed
59+
60+
// Validate that the consolidated query is not empty
61+
if (userQueryForRAG.trim() === '') {
62+
return res.status(400).json({ error: 'Bad Request: Consolidated text from contents is empty.' });
63+
}
3464

3565
const rag_type = config.geminiRagType;
3666
const numberOfResults = config.geminiNResults;
3767

3868
Logger.info(
39-
`INFO: Gemini Batch Request. Model: ${model}. RAG Type (from config): ${rag_type}. N Results (from config): ${numberOfResults}.`
69+
`INFO: Gemini Batch Request. Model: ${model}. RAG Type (from config): ${rag_type}. N Results (from config): ${numberOfResults}. Consolidated user query for RAG (first 100 chars): "${userQueryForRAG.substring(0, 100)}..."`
4070
);
4171

4272
const ragService = await initializedRagService;
43-
const chunks = await ragService.queryChunks(userQuery, numberOfResults);
73+
const chunks = await ragService.queryChunks(userQueryForRAG, numberOfResults);
74+
75+
let contentsForLlm = JSON.parse(JSON.stringify(contents)) as GeminiContent[]; // Deep copy
4476

45-
let augmentedPrompt: string;
4677
if (!chunks || chunks.length === 0) {
47-
console.warn(
48-
`No relevant chunks found for query: "${userQuery}" with model ${model}. Querying LLM directly without RAG context.`
78+
Logger.warn(
79+
`No relevant chunks found for query (first 100 chars): "${userQueryForRAG.substring(0, 100)}..." with model ${model}. Querying LLM directly without RAG context.`
4980
);
50-
augmentedPrompt = userQuery;
81+
// contentsForLlm remains as is (original user contents)
5182
} else {
5283
let contextContent: string[] = [];
5384
if (rag_type === 'advanced') {
@@ -72,24 +103,33 @@ export const handleGeminiBatch = async (req: Request, res: Response) => {
72103
}
73104

74105
if (contextContent.length === 0) {
75-
console.warn(
76-
`Chunks were found for query "${userQuery}" (RAG Type from config: ${rag_type}, model: ${model}), but no relevant content could be extracted. Querying LLM directly.`
106+
Logger.warn(
107+
`Chunks were found for query (first 100 chars): "${userQueryForRAG.substring(0, 100)}..." (RAG Type from config: ${rag_type}, model: ${model}), but no relevant content could be extracted. Querying LLM directly.`
77108
);
78-
augmentedPrompt = userQuery;
109+
// contentsForLlm remains as is
79110
} else {
80111
const context = contextContent.join('\n---\n');
81112
const contextDescription =
82113
rag_type === 'advanced' ? 'Relevant Information from Parent Documents' : 'Relevant Text Chunks';
83-
augmentedPrompt = `User Query: ${userQuery}\n\n${contextDescription}:\n---\n${context}\n---\nBased on the relevant information above, answer the user query.`;
114+
const ragAugmentationPrefix = `Based on the relevant information below, answer the user query.\n${contextDescription}:\n---\n${context}\n---\nConsidering the above context and the conversation history, here is the latest user message: `;
115+
116+
const lastContentItem = contentsForLlm[contentsForLlm.length - 1];
117+
if (lastContentItem && lastContentItem.parts && lastContentItem.parts.length > 0) {
118+
lastContentItem.parts[0].text = ragAugmentationPrefix + lastContentItem.parts[0].text;
119+
} else {
120+
Logger.warn(
121+
'Last content item for RAG augmentation is malformed or missing parts. RAG context might not be prepended as expected.'
122+
);
123+
// Fallback: if the last message is weird, but we have context, maybe put context in its own message?
124+
// For now, the original plan is to modify the last message. If it's malformed, it won't be modified.
125+
}
84126
}
85127
}
86128

87-
// Logger.info(`INFO: Gemini Batch Request. Model: ${model}. RAG Type: ${rag_type}.`); // Already logged above with more details
88-
89129
try {
90130
const llmResponse = (await generateText({
91131
model: model,
92-
query: augmentedPrompt,
132+
contents: contentsForLlm, // Pass the (potentially RAG-augmented) GeminiContent[]
93133
stream: false,
94134
})) as LLMChatResponse;
95135
res.status(200).json(llmResponse);
@@ -100,7 +140,10 @@ export const handleGeminiBatch = async (req: Request, res: Response) => {
100140
.json({ details: llmError.message, error: `Failed to get response from LLM provider Gemini.` });
101141
}
102142
} catch (error: any) {
103-
Logger.error(`Error in handleGeminiBatch for model ${model}, query "${userQuery}":`, error); // Use userQuery for logging
143+
Logger.error(
144+
`Error in handleGeminiBatch for model ${model}, consolidated query (first 100 chars): "${userQueryForRAG.substring(0, 100)}":`,
145+
error
146+
);
104147
if (error.message && error.message.includes('ChromaDB collection is not initialized')) {
105148
return res.status(503).json({ error: 'Service Unavailable: RAG service is not ready.' });
106149
}
@@ -113,65 +156,101 @@ export const handleGeminiBatch = async (req: Request, res: Response) => {
113156

114157
export const handleGeminiStream = async (req: Request, res: Response) => {
115158
const model = req.params.model;
116-
let userQuery = ''; // Define userQuery here to be accessible in the catch block
159+
let userQueryForRAG = ''; // For RAG and general query representation
117160

118161
try {
119162
const { contents } = req.body as GeminiQueryRequest;
120163

121164
res.setHeader('Content-Type', 'text/event-stream');
122165
res.setHeader('Cache-Control', 'no-cache');
123166
res.setHeader('Connection', 'keep-alive');
124-
// res.flushHeaders(); // Flush headers after initial validation
125167

126-
// Validate contents structure
168+
// Initial structural validation (can be enhanced to check all items)
127169
if (
128170
!contents ||
129171
!Array.isArray(contents) ||
130172
contents.length === 0 ||
131-
!contents[0].parts ||
132-
!Array.isArray(contents[0].parts) ||
133-
contents[0].parts.length === 0 ||
134-
!contents[0].parts[0].text ||
135-
typeof contents[0].parts[0].text !== 'string' ||
136-
contents[0].parts[0].text.trim() === ''
173+
!contents.every(
174+
(item) =>
175+
item.parts &&
176+
Array.isArray(item.parts) &&
177+
item.parts.length > 0 &&
178+
item.parts.every((part) => part.text && typeof part.text === 'string')
179+
)
137180
) {
138-
// If headers not sent, can send 400
139181
if (!res.headersSent) {
140182
return res.status(400).json({
141183
error:
142-
'Bad Request: contents is required and must be an array with at least one part containing a non-empty text string.',
184+
'Bad Request: contents is required and must be an array of content items, each with at least one part containing a non-empty text string.',
143185
});
144186
} else {
145-
// Headers sent, write error to stream
146187
res.write(
147188
`data: ${JSON.stringify({
148189
error:
149-
'Bad Request: contents is required and must be an array with at least one part containing a non-empty text string.',
190+
'Bad Request: contents is required and must be an array of content items, each with at least one part containing a non-empty text string.',
150191
})}\n\n`
151192
);
152193
res.end();
153194
return;
154195
}
155196
}
156-
userQuery = contents[0].parts[0].text; // Assign userQuery after validation
157-
res.flushHeaders(); // Send headers now that initial validation passed
197+
198+
// Create userQueryForRAG from all parts of all content items
199+
if (contents && Array.isArray(contents)) {
200+
let messagesToConsider = contents;
201+
const windowSize = config.ragConversationWindowSize;
202+
203+
if (windowSize && windowSize > 0 && windowSize < contents.length) {
204+
messagesToConsider = contents.slice(-windowSize); // Get the last N messages
205+
Logger.info(`RAG windowing: Using last ${windowSize} of ${contents.length} messages for RAG query (stream).`);
206+
} else if (windowSize && windowSize > 0 && windowSize >= contents.length) {
207+
Logger.info(
208+
`RAG windowing: Window size ${windowSize} is >= total messages ${contents.length}. Using all messages for RAG query (stream).`
209+
);
210+
// messagesToConsider remains 'contents'
211+
} else {
212+
// windowSize is 0 or not set
213+
Logger.info(
214+
`RAG windowing: Window size is 0 or not set. Using all ${contents.length} messages for RAG query (stream).`
215+
);
216+
// messagesToConsider remains 'contents'
217+
}
218+
219+
userQueryForRAG = messagesToConsider
220+
.flatMap((contentItem) => contentItem.parts.map((part) => part.text))
221+
.join('\n');
222+
}
223+
// userQueryForErrorLogging removed
224+
225+
// Validate that the consolidated query is not empty
226+
if (userQueryForRAG.trim() === '') {
227+
if (!res.headersSent) {
228+
return res.status(400).json({ error: 'Bad Request: Consolidated text from contents is empty.' });
229+
} else {
230+
res.write(`data: ${JSON.stringify({ error: 'Bad Request: Consolidated text from contents is empty.' })}\n\n`);
231+
res.end();
232+
return;
233+
}
234+
}
235+
res.flushHeaders(); // Send headers now that validation passed
158236

159237
const rag_type = config.geminiRagType;
160238
const numberOfResults = config.geminiNResults;
161239

162240
Logger.info(
163-
`INFO: Gemini Stream Request. Model: ${model}. RAG Type (from config): ${rag_type}. N Results (from config): ${numberOfResults}.`
241+
`INFO: Gemini Stream Request. Model: ${model}. RAG Type (from config): ${rag_type}. N Results (from config): ${numberOfResults}. Consolidated user query for RAG (first 100 chars): "${userQueryForRAG.substring(0, 100)}..."`
164242
);
165243

166244
const ragService = await initializedRagService;
167-
const chunks = await ragService.queryChunks(userQuery, numberOfResults);
245+
const chunks = await ragService.queryChunks(userQueryForRAG, numberOfResults);
246+
247+
let contentsForLlm = JSON.parse(JSON.stringify(contents)) as GeminiContent[]; // Deep copy
168248

169-
let augmentedPrompt: string;
170249
if (!chunks || chunks.length === 0) {
171-
console.warn(
172-
`No relevant chunks found for query: "${userQuery}" with model ${model} (stream). Querying LLM directly without RAG context.`
250+
Logger.warn(
251+
`No relevant chunks found for query (first 100 chars): "${userQueryForRAG.substring(0, 100)}..." with model ${model} (stream). Querying LLM directly without RAG context.`
173252
);
174-
augmentedPrompt = userQuery;
253+
// contentsForLlm remains as is
175254
} else {
176255
let contextContent: string[] = [];
177256
if (rag_type === 'advanced') {
@@ -196,47 +275,52 @@ export const handleGeminiStream = async (req: Request, res: Response) => {
196275
}
197276

198277
if (contextContent.length === 0) {
199-
console.warn(
200-
`Chunks were found for query "${userQuery}" (RAG Type from config: ${rag_type}, model: ${model}, stream), but no relevant content could be extracted. Querying LLM directly.`
278+
Logger.warn(
279+
`Chunks were found for query (first 100 chars): "${userQueryForRAG.substring(0, 100)}..." (RAG Type from config: ${rag_type}, model: ${model}, stream), but no relevant content could be extracted. Querying LLM directly.`
201280
);
202-
augmentedPrompt = userQuery;
281+
// contentsForLlm remains as is
203282
} else {
204283
const context = contextContent.join('\n---\n');
205284
const contextDescription =
206285
rag_type === 'advanced' ? 'Relevant Information from Parent Documents' : 'Relevant Text Chunks';
207-
augmentedPrompt = `User Query: ${userQuery}\n\n${contextDescription}:\n---\n${context}\n---\nBased on the relevant information above, answer the user query.`;
286+
const ragAugmentationPrefix = `Based on the relevant information below, answer the user query.\n${contextDescription}:\n---\n${context}\n---\nConsidering the above context and the conversation history, here is the latest user message: `;
287+
288+
const lastContentItem = contentsForLlm[contentsForLlm.length - 1];
289+
if (lastContentItem && lastContentItem.parts && lastContentItem.parts.length > 0) {
290+
lastContentItem.parts[0].text = ragAugmentationPrefix + lastContentItem.parts[0].text;
291+
} else {
292+
Logger.warn(
293+
'Last content item for RAG augmentation is malformed or missing parts (stream). RAG context might not be prepended as expected.'
294+
);
295+
}
208296
}
209297
}
210298

211-
// Logger.info(`INFO: Gemini Stream Request. Model: ${model}. RAG Type: ${rag_type}.`); // Already logged above
212-
213299
try {
214300
await generateText({
215301
model: model,
216-
// The onChunk callback now receives a raw SSE line string from llmWrapper.ts
217302
onChunk: (rawSseLine: string) => {
218-
// Pass the raw SSE line, followed by a single newline, as per SSE spec.
219303
res.write(`${rawSseLine}\n`);
220304
},
221-
query: augmentedPrompt,
305+
contents: contentsForLlm, // Pass the (potentially RAG-augmented) GeminiContent[]
222306
stream: true,
223307
});
224308
res.end();
225309
} catch (llmError: any) {
226310
Logger.error(`Error calling llmWrapper for Gemini stream (model: ${model}):`, llmError);
227311
if (!res.writableEnded) {
228-
// Check if stream is still open
229312
res.write(
230313
`data: ${JSON.stringify({ details: llmError.message, error: `Failed to get response from LLM provider Gemini.` })}\n\n`
231314
);
232315
res.end();
233316
}
234317
}
235318
} catch (error: any) {
236-
Logger.error(`Error in handleGeminiStream for model ${model}, query "${userQuery}":`, error); // Use userQuery for logging
319+
Logger.error(
320+
`Error in handleGeminiStream for model ${model}, consolidated query (first 100 chars): "${userQueryForRAG.substring(0, 100)}":`,
321+
error
322+
);
237323
if (!res.headersSent) {
238-
// This case should ideally not be reached if query validation is first.
239-
// However, for other early errors (like RAG service init), this is a fallback.
240324
if (error.message && error.message.includes('ChromaDB collection is not initialized')) {
241325
res.status(503).json({ error: 'Service Unavailable: RAG service is not ready.' });
242326
return;
@@ -247,7 +331,6 @@ export const handleGeminiStream = async (req: Request, res: Response) => {
247331
}
248332
res.status(500).json({ details: error.message, error: 'Internal Server Error' });
249333
} else if (!res.writableEnded) {
250-
// Headers sent, stream is open, write error to stream
251334
let errorMessage = 'Internal Server Error';
252335
if (error.message && error.message.includes('ChromaDB collection is not initialized')) {
253336
errorMessage = 'Service Unavailable: RAG service is not ready.';
@@ -257,6 +340,5 @@ export const handleGeminiStream = async (req: Request, res: Response) => {
257340
res.write(`data: ${JSON.stringify({ details: error.message, error: errorMessage })}\n\n`);
258341
res.end();
259342
}
260-
// If res.writableEnded is true, can't do anything more.
261343
}
262344
};

0 commit comments

Comments
 (0)