xynehq · junaid-shirur · Oct 8, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
@@ -32,6 +32,152 @@ import {
   getSortedScoredImageChunks,
 } from "@xyne/vespa-ts/mappers"
 import type { UserMetadataType } from "@/types"
+import { querySheetChunks } from "@/lib/duckdb"
+import { chunkSheetWithHeaders } from "@/sheetChunk"
+
+// Utility function to extract header from chunks and remove headers from each chunk
+const extractHeaderAndDataChunks = (
+  chunks_summary: (string | { chunk: string; score: number; index: number })[] | undefined,
+  matchfeatures?: any
+): { 
+  chunks_summary: (string | { chunk: string; score: number; index: number })[];
+  matchfeatures?: any;
+} => {
+  if (!chunks_summary || chunks_summary.length === 0) {
+    return { chunks_summary: [], matchfeatures };
+  }
+
+  // Find the header from the first chunk
+  let headerChunk = '';
+  if (chunks_summary.length > 0) {
+    const firstChunk = typeof chunks_summary[0] === "string" ? chunks_summary[0] : chunks_summary[0].chunk;
+    const lines = firstChunk.split('\n');
+    if (lines.length > 0 && lines[0].includes('\t')) {
+      headerChunk = lines[0]; // Extract the header line
+    }
+  }
+
+  // Process all chunks: remove header from each and keep only data rows
+  const processedChunks: (string | { chunk: string; score: number; index: number })[] = [];
+  let newMatchfeatures = matchfeatures;
+
+  // Add header as first chunk if found, using the same structure as original
+  if (headerChunk) {
+    if (typeof chunks_summary[0] === "string") {
+      processedChunks.push(headerChunk);
+    } else {
+      processedChunks.push({
+        chunk: headerChunk,
+        score: 1,
+        index: 0,
+      });
+    }
+
+     // Update matchfeatures to include the header chunk score
+     if (newMatchfeatures) {
+        const existingCells = newMatchfeatures.chunk_scores?.cells || {};
+        const scores = Object.values(existingCells) as number[];
+        const maxScore = scores.length > 0 ? Math.max(...scores) : 0;
+        // Create new chunk_scores that match the new chunks
+        const newChunkScores: Record<string, number> = {}
+        newChunkScores["0"] = maxScore + 1
+        Object.entries(existingCells).forEach(([idx, score]) => {
+          newChunkScores[(parseInt(idx) + 1).toString()] = score as number
+        })
+
+       newMatchfeatures = {
+         ...newMatchfeatures,
+         chunk_scores: {
+           cells: newChunkScores
+         }
+       };
+     }
+  }
+
+  // Process each original chunk: remove header and add data rows
+  for (let i = 0; i < chunks_summary.length; i++) {
+    const originalChunk = chunks_summary[i];
+    const chunkContent = typeof originalChunk === "string" ? originalChunk : originalChunk.chunk;
+    const lines = chunkContent.split('\n');
+
+    // Skip the first line (header) and keep only data rows
+    const dataRows = lines.slice(1).filter(line => line.trim().length > 0);
+    if (dataRows.length > 0) {
+      const dataContent = dataRows.join('\n');
+
+      if (typeof originalChunk === "string") {
+        processedChunks.push(dataContent);
+      } else {
+        processedChunks.push({
+          chunk: dataContent,
+          score: originalChunk.score,
+          index: originalChunk.index
+        });
+      }
+    }
+  }
+
+  return { chunks_summary: processedChunks, matchfeatures: newMatchfeatures };
+};
+
+// Utility function to process sheet queries for spreadsheet files
+const processSheetQuery = async (
+  chunks_summary: (string | { chunk: string; score: number; index: number })[] | undefined,
+  query: string,
+  matchfeatures: any
+): Promise<{
+  chunks_summary: { chunk: string; score: number; index: number }[];
+  matchfeatures: any;
+  maxSummaryChunks: number;
+} | null> => {
+  const duckDBResult = await querySheetChunks(
+    chunks_summary?.map((c) => typeof c === "string" ? c : c.chunk) || [], 
+    query
+  )
+
+  // If DuckDB query failed (null means not metric-related or SQL generation failed), return null to fallback to original approach
+  if (!duckDBResult) {
+    return null;
+  }
+
+  // Create metadata chunk with query information (excluding data)
+  const metadataChunk = JSON.stringify({
+    assumptions: duckDBResult.assumptions,
+    schema_fragment: duckDBResult.schema_fragment
+  }, null, 2)
+
+  // Use chunkSheetWithHeaders to chunk the 2D array data
+  const dataChunks = chunkSheetWithHeaders(duckDBResult.data.rows, {headerRows: 1})
+
+  // Combine metadata chunk with data chunks
+  const allChunks = [metadataChunk, ...dataChunks]
+
+  const newChunksSummary = allChunks.map((c, idx) => ({chunk: c, score: 0, index: idx}))
+
+  // Update matchfeatures to correspond to the new chunks
+  let newMatchfeatures = matchfeatures
+  if (matchfeatures) {
+    // Create new chunk_scores that match the new chunks
+    const newChunkScores: Record<string, number> = {}
+    allChunks.forEach((_, idx) => {
+      newChunkScores[idx.toString()] = 0 // All new chunks get score 0
+    })
+
+    // Update the matchfeatures with new chunk_scores
+    newMatchfeatures = {
+      ...matchfeatures,
+      chunk_scores: {
+        cells: newChunkScores
+      }
+    }
+  }
+
+  return {
+    chunks_summary: newChunksSummary,
+    matchfeatures: newMatchfeatures,
+    maxSummaryChunks: allChunks.length
+  }
+}
 
 // Utility to capitalize the first letter of a string
 const capitalize = (str: string) => str.charAt(0).toUpperCase() + str.slice(1)
@@ -573,7 +719,7 @@ const constructCollectionFileContext = (
   isMsgWithSources?: boolean,
 ): string => {
 
-  if ((!maxSummaryChunks && !isSelectedFiles) || isMsgWithSources) {
+  if ((!maxSummaryChunks && !isSelectedFiles)) {
     maxSummaryChunks = fields.chunks_summary?.length
   }
   let chunks: ScoredChunk[] = []
@@ -738,13 +884,43 @@ export const answerColoredContextMap = (
 }
 
 type AiContext = string
-export const answerContextMap = (
+export const answerContextMap = async (
   searchResult: VespaSearchResults,
   userMetadata: UserMetadataType,
   maxSummaryChunks?: number,
   isSelectedFiles?: boolean,
   isMsgWithSources?: boolean,
-): AiContext => {
+  query?: string,
+): Promise<AiContext> => {
+  if(searchResult.fields.sddocname === fileSchema || searchResult.fields.sddocname === dataSourceFileSchema || searchResult.fields.sddocname === KbItemsSchema || searchResult.fields.sddocname === mailAttachmentSchema) {
+    let mimeType
+    if(searchResult.fields.sddocname === mailAttachmentSchema) {
+      mimeType = searchResult.fields.fileType
+    } else {
+      mimeType = searchResult.fields.mimeType
+    }
+    if(mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
+      mimeType === "application/vnd.ms-excel" ||
+      mimeType === "text/csv") {
+        const result = extractHeaderAndDataChunks(searchResult.fields.chunks_summary, searchResult.fields.matchfeatures);
+        searchResult.fields.chunks_summary = result.chunks_summary;
+        if (result.matchfeatures) {
+          searchResult.fields.matchfeatures = result.matchfeatures;
+        }
+
+        if (query) {
+          const sheetResult = await processSheetQuery(searchResult.fields.chunks_summary, query, searchResult.fields.matchfeatures)
+          if (sheetResult) {
+            const { chunks_summary, matchfeatures, maxSummaryChunks: newMaxSummaryChunks } = sheetResult
+            searchResult.fields.chunks_summary = chunks_summary
+            searchResult.fields.matchfeatures = matchfeatures
+            maxSummaryChunks = newMaxSummaryChunks
+          } else {
+            maxSummaryChunks = Math.min(searchResult.fields.chunks_summary?.length || 0, 100)
+          }
+        }
+    }
+  }
   if (searchResult.fields.sddocname === fileSchema) {
     return constructFileContext(
       searchResult.fields,

@@ -135,6 +135,7 @@ import { getModelValueFromLabel } from "@/ai/modelConfig"
 import {
   buildContext,
   buildUserQuery,
+  expandSheetIds,
   getThreadContext,
   isContextSelected,
   UnderstandMessageAndAnswer,
@@ -530,13 +531,21 @@ const checkAndYieldCitationsForAgent = async function* (
   }
 }
 
-const vespaResultToMinimalAgentFragment = (
+const vespaResultToMinimalAgentFragment = async (
   child: VespaSearchResult,
   idx: number,
   userMetadata: UserMetadataType,
-): MinimalAgentFragment => ({
+  query: string,
+): Promise<MinimalAgentFragment> => ({
   id: `${(child.fields as any)?.docId || `Frangment_id_${idx}`}`,
-  content: answerContextMap(child as VespaSearchResults, userMetadata, 0, true),
+  content: await answerContextMap(
+    child as VespaSearchResults,
+    userMetadata,
+    0,
+    true,
+    undefined,
+    query,
+  ),
   source: searchToCitation(child as VespaSearchResults),
   confidence: 1.0,
 })
@@ -839,15 +848,12 @@ export const MessageWithToolsApi = async (c: Context) => {
     }
 
     const attachmentMetadata = parseAttachmentMetadata(c)
-    const attachmentFileIds = attachmentMetadata.map(
-      (m: AttachmentMetadata) => m.fileId,
-    )
     const imageAttachmentFileIds = attachmentMetadata
       .filter((m) => m.isImage)
       .map((m) => m.fileId)
     const nonImageAttachmentFileIds = attachmentMetadata
       .filter((m) => !m.isImage)
-      .map((m) => m.fileId)
+      .flatMap((m) => expandSheetIds(m.fileId))
     let attachmentStorageError: Error | null = null
 
     const contextExtractionSpan = initSpan.startSpan("context_extraction")
@@ -866,7 +872,7 @@ export const MessageWithToolsApi = async (c: Context) => {
       extractedInfo?.totalValidFileIdsFromLinkCount
     loggerWithChild({ email: email }).info(`Extracted ${fileIds} extractedInfo`)
     loggerWithChild({ email: email }).info(
-      `Total attachment files received: ${attachmentFileIds.length}`,
+      `Total attachment files received: ${attachmentMetadata.length}`,
     )
     const hasReferencedContext = fileIds && fileIds.length > 0
     contextExtractionSpan.setAttribute("file_ids_count", fileIds?.length || 0)
@@ -1521,11 +1527,13 @@ export const MessageWithToolsApi = async (c: Context) => {
             if (results?.root?.children && results.root.children.length > 0) {
               const contextPromises = results?.root?.children?.map(
                 async (v, i) => {
-                  let content = answerContextMap(
+                  let content = await answerContextMap(
                     v as VespaSearchResults,
                     userMetadata,
                     0,
                     true,
+                    undefined,
+                    message,
                   )
                   const chatContainerFields =
                     isChatContainerFields(v.fields) &&
@@ -1593,22 +1601,24 @@ export const MessageWithToolsApi = async (c: Context) => {
                 planningContext += "\n" + buildContext(threadContexts, 10, userMetadata)
               }
 
-              gatheredFragments = results.root.children.map(
-                (child: VespaSearchResult, idx) =>
-                  vespaResultToMinimalAgentFragment(child, idx, userMetadata),
+              gatheredFragments = await Promise.all(
+                results.root.children.map(
+                  async (child: VespaSearchResult, idx) =>
+                    await vespaResultToMinimalAgentFragment(child, idx, userMetadata, message),
+                )
               )
               if (chatContexts.length > 0) {
                 gatheredFragments.push(
-                  ...chatContexts.map((child, idx) =>
-                    vespaResultToMinimalAgentFragment(child, idx, userMetadata),
-                  ),
+                  ...(await Promise.all(chatContexts.map(async (child, idx) =>
+                    await vespaResultToMinimalAgentFragment(child, idx, userMetadata, message),
+                  ))),
                 )
               }
               if (threadContexts.length > 0) {
                 gatheredFragments.push(
-                  ...threadContexts.map((child, idx) =>
-                    vespaResultToMinimalAgentFragment(child, idx, userMetadata),
-                  ),
+                  ...(await Promise.all(threadContexts.map(async (child, idx) =>
+                    await vespaResultToMinimalAgentFragment(child, idx, userMetadata, message),
+                  ))),
                 )
               }
               const parseSynthesisOutput = await performSynthesis(
@@ -2881,9 +2891,9 @@ export const AgentMessageApiRagOff = async (c: Context) => {
             chunksSpan.end()
             if (allChunks?.root?.children) {
               const startIndex = 0
-              fragments = allChunks.root.children.map((child, idx) =>
-                vespaResultToMinimalAgentFragment(child, idx, userMetadata),
-              )
+              fragments = await Promise.all(allChunks.root.children.map(async (child, idx) =>
+                await vespaResultToMinimalAgentFragment(child, idx, userMetadata, message)
+              ))
               context = answerContextMapFromFragments(
                 fragments,
                 maxDefaultSummary,
@@ -3149,9 +3159,9 @@ export const AgentMessageApiRagOff = async (c: Context) => {
         if (docIds.length > 0) {
           const allChunks = await GetDocumentsByDocIds(docIds, chunksSpan)
           if (allChunks?.root?.children) {
-            fragments = allChunks.root.children.map((child, idx) =>
-              vespaResultToMinimalAgentFragment(child, idx, userMetadata),
-            )
+            fragments = await Promise.all(allChunks.root.children.map(async (child, idx) =>
+              await vespaResultToMinimalAgentFragment(child, idx, userMetadata, message),
+            ))
             context = answerContextMapFromFragments(
               fragments,
               maxDefaultSummary,
@@ -3365,7 +3375,7 @@ export const AgentMessageApi = async (c: Context) => {
       .map((m) => m.fileId)
     const nonImageAttachmentFileIds = attachmentMetadata
       .filter((m) => !m.isImage)
-      .map((m) => m.fileId)
+      .flatMap((m) => expandSheetIds(m.fileId))
     let attachmentStorageError: Error | null = null
     let {
       message,