Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion server/db/knowledgeBase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ export const getAllCollectionAndFolderItems = async (
)
if (!col) continue
const roots = await trx
.select({ id: collectionItems.id })
.select({ id: collectionItems.id, type: collectionItems.type })
.from(collectionItems)
.where(
and(
Expand All @@ -618,6 +618,10 @@ export const getAllCollectionAndFolderItems = async (
),
)
roots.forEach((r) => queue.push({ itemId: r.id }))
roots.forEach((r) => {
if (r.type == "folder") folderIds.push(r.id)
else if (r.type == "file") fileIds.push(r.id)
})
} else if (input.startsWith("clfd-")) {
// Folder vespa docId -> resolve to item id
const [folder] = await trx
Expand Down
88 changes: 74 additions & 14 deletions server/search/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ import {
getAllCollectionAndFolderItems,
getCollectionFoldersItemIds,
} from "@/db/knowledgeBase"
import { collections } from "@/db/schema"
import type { SelectAgent } from "@/db/agent"
import { sharedVespaService } from "./vespaService"
import { and, inArray, isNull } from "drizzle-orm"

const Logger = getLogger(Subsystem.Vespa).child({ module: "search-utils" })

Expand All @@ -41,15 +43,18 @@ export function expandSheetIds(fileId: string): string[] {
return expandedIds
}

export function replaceSheetIndex(vespaDocId: string, newSheetIndex: number): string {
export function replaceSheetIndex(
vespaDocId: string,
newSheetIndex: number,
): string {
// Check if the vespaDocId matches the pattern docId_sheet_number
const sheetMatch = vespaDocId.match(/^(.+)_sheet_(\d+)$/)

if (!sheetMatch) {
// Not a sheet ID, return as is
return vespaDocId
}

const [, docId] = sheetMatch
return `${docId}_sheet_${newSheetIndex}`
}
Expand All @@ -68,37 +73,92 @@ export function removePrefixesFromItemIds(itemIds: string[]): string[] {
})
}

export async function getCollectionVespaIds(
collectionDbIds: string[],
): Promise<string[]> {
try {
if (collectionDbIds.length === 0) return []

const result = await db
.select({ vespaDocId: collections.vespaDocId })
.from(collections)
.where(
and(
inArray(collections.id, collectionDbIds),
isNull(collections.deletedAt),
),
)

return result.map((item) => item.vespaDocId).filter(Boolean)
} catch (error) {
Logger.error("Error getting collection vespaIds:", error)
return []
}
}

export async function getVespaIdsFromPrefixedItemIds(
prefixedItemIds: string[],
): Promise<string[]> {
try {
// Remove prefixes from itemIds
const cleanedItemIds = removePrefixesFromItemIds(prefixedItemIds)
// Get their corresponding vespaIds
const ids = await getCollectionFoldersItemIds(cleanedItemIds, db)
// get all their children db Ids
const { fileIds, folderIds } = await getAllCollectionAndFolderItems(
ids
// Separate itemIds by type based on their prefixes
const collectionIds: string[] = []
const folderFileIds: string[] = []

for (const itemId of prefixedItemIds) {
if (itemId.startsWith("cl-")) {
// Collection ID - remove 'cl-' prefix
collectionIds.push(itemId.substring(3))
} else if (itemId.startsWith("clfd-") || itemId.startsWith("clf-")) {
// Folder or file ID - will be cleaned by removePrefixesFromItemIds
folderFileIds.push(itemId)
} else {
Logger.error("Invalid collection item")
}
}

const allVespaDocIds: string[] = []

// Handle collection IDs
if (collectionIds.length > 0) {
const collectionVespaIds = await getCollectionVespaIds(collectionIds)
allVespaDocIds.push(...collectionVespaIds)
}

// Handle folder/file IDs
if (folderFileIds.length > 0) {
const cleanedFolderFileIds = removePrefixesFromItemIds(folderFileIds)
const ids = await getCollectionFoldersItemIds(cleanedFolderFileIds, db)
const folderFileVespaIds = ids
.map((doc) => doc.vespaDocId)
.filter((id): id is string => id !== null),
.filter((id): id is string => id !== null)
allVespaDocIds.push(...folderFileVespaIds)
}

// Get all their children db Ids using the combined vespa doc IDs
const { fileIds, folderIds } = await getAllCollectionAndFolderItems(
allVespaDocIds,
db,
)

// Start with the original collection vespa doc IDs
const finalVespaIds = [...allVespaDocIds]

// Get vespaIds for all file items
const fileVespaIds = await getCollectionFilesVespaIds(fileIds, db)
const allVespaIds = fileVespaIds
const fileVespaDocIds = fileVespaIds
.map((item: any) => item.vespaDocId)
.filter(Boolean)
finalVespaIds.push(...fileVespaDocIds)

// Also get vespaIds for folder items
if (folderIds.length > 0) {
const folderVespaIds = await getCollectionFoldersItemIds(folderIds, db)
const folderVespaDocIds = folderVespaIds
.map((item: any) => item.vespaDocId)
.filter(Boolean)
allVespaIds.push(...folderVespaDocIds)
finalVespaIds.push(...folderVespaDocIds)
}
return allVespaIds
return finalVespaIds
} catch (error) {
Logger.error("Error getting vespaIds from prefixed itemIds:", error)
return []
Expand Down