Skip to content

Commit 47913f8

Browse files
authored
fix(kb): fix mistral parse and kb uploads, include userId in internal auth (#1767)
* fix(kb): fix mistral parse and kb uploads, include userId in internal auth * update updated_at for kb when adding a new doc via knowledge block * update tests
1 parent 48f520b commit 47913f8

File tree

8 files changed

+146
-42
lines changed

8 files changed

+146
-42
lines changed

apps/sim/app/api/knowledge/utils.test.ts

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,23 @@ vi.mock('@sim/db', () => {
117117
return {
118118
db: {
119119
select: vi.fn(() => selectBuilder),
120-
update: () => ({
121-
set: () => ({
122-
where: () => Promise.resolve(),
120+
update: (table: any) => ({
121+
set: (payload: any) => ({
122+
where: () => {
123+
const tableSymbols = Object.getOwnPropertySymbols(table || {})
124+
const baseNameSymbol = tableSymbols.find((s) => s.toString().includes('BaseName'))
125+
const tableName = baseNameSymbol ? table[baseNameSymbol] : ''
126+
if (tableName === 'knowledge_base') {
127+
dbOps.order.push('updateKb')
128+
dbOps.updatePayloads.push(payload)
129+
} else if (tableName === 'document') {
130+
if (payload.processingStatus !== 'processing') {
131+
dbOps.order.push('updateDoc')
132+
dbOps.updatePayloads.push(payload)
133+
}
134+
}
135+
return Promise.resolve()
136+
},
123137
}),
124138
}),
125139
transaction: vi.fn(async (fn: any) => {
@@ -131,11 +145,11 @@ vi.mock('@sim/db', () => {
131145
return Promise.resolve()
132146
},
133147
}),
134-
update: () => ({
148+
update: (table: any) => ({
135149
set: (payload: any) => ({
136150
where: () => {
137151
dbOps.updatePayloads.push(payload)
138-
const label = dbOps.updatePayloads.length === 1 ? 'updateDoc' : 'updateKb'
152+
const label = payload.processingStatus !== undefined ? 'updateDoc' : 'updateKb'
139153
dbOps.order.push(label)
140154
return Promise.resolve()
141155
},
@@ -169,6 +183,9 @@ describe('Knowledge Utils', () => {
169183

170184
describe('processDocumentAsync', () => {
171185
it.concurrent('should insert embeddings before updating document counters', async () => {
186+
kbRows.push({ id: 'kb1', userId: 'user1', workspaceId: null })
187+
docRows.push({ id: 'doc1', knowledgeBaseId: 'kb1' })
188+
172189
await processDocumentAsync(
173190
'kb1',
174191
'doc1',

apps/sim/app/api/workflows/[id]/deployed/route.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
2929

3030
if (authHeader?.startsWith('Bearer ')) {
3131
const token = authHeader.split(' ')[1]
32-
isInternalCall = await verifyInternalToken(token)
32+
const verification = await verifyInternalToken(token)
33+
isInternalCall = verification.valid
3334
}
3435

3536
if (!isInternalCall) {

apps/sim/app/api/workflows/[id]/route.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ export async function GET(request: NextRequest, { params }: { params: Promise<{
3737

3838
if (authHeader?.startsWith('Bearer ')) {
3939
const token = authHeader.split(' ')[1]
40-
isInternalCall = await verifyInternalToken(token)
40+
const verification = await verifyInternalToken(token)
41+
isInternalCall = verification.valid
4142
}
4243

4344
let userId: string | null = null

apps/sim/lib/auth/hybrid.ts

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,39 +33,42 @@ export async function checkHybridAuth(
3333
const authHeader = request.headers.get('authorization')
3434
if (authHeader?.startsWith('Bearer ')) {
3535
const token = authHeader.split(' ')[1]
36-
const isInternalCall = await verifyInternalToken(token)
36+
const verification = await verifyInternalToken(token)
3737

38-
if (isInternalCall) {
39-
// For internal calls, we need workflowId to determine user context
38+
if (verification.valid) {
4039
let workflowId: string | null = null
40+
let userId: string | null = verification.userId || null
4141

42-
// Try to get workflowId from query params or request body
4342
const { searchParams } = new URL(request.url)
4443
workflowId = searchParams.get('workflowId')
44+
if (!userId) {
45+
userId = searchParams.get('userId')
46+
}
4547

46-
if (!workflowId && request.method === 'POST') {
48+
if (!workflowId && !userId && request.method === 'POST') {
4749
try {
4850
// Clone the request to avoid consuming the original body
4951
const clonedRequest = request.clone()
5052
const bodyText = await clonedRequest.text()
5153
if (bodyText) {
5254
const body = JSON.parse(bodyText)
5355
workflowId = body.workflowId || body._context?.workflowId
56+
userId = userId || body.userId || body._context?.userId
5457
}
5558
} catch {
5659
// Ignore JSON parse errors
5760
}
5861
}
5962

60-
if (!workflowId && options.requireWorkflowId !== false) {
63+
if (userId) {
6164
return {
62-
success: false,
63-
error: 'workflowId required for internal JWT calls',
65+
success: true,
66+
userId,
67+
authType: 'internal_jwt',
6468
}
6569
}
6670

6771
if (workflowId) {
68-
// Get workflow owner as user context
6972
const [workflowData] = await db
7073
.select({ userId: workflow.userId })
7174
.from(workflow)
@@ -85,7 +88,14 @@ export async function checkHybridAuth(
8588
authType: 'internal_jwt',
8689
}
8790
}
88-
// Internal call without workflow context - still valid for some routes
91+
92+
if (options.requireWorkflowId !== false) {
93+
return {
94+
success: false,
95+
error: 'workflowId or userId required for internal JWT calls',
96+
}
97+
}
98+
8999
return {
90100
success: true,
91101
authType: 'internal_jwt',

apps/sim/lib/auth/internal.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import { createLogger } from '@/lib/logs/console/logger'
55

66
const logger = createLogger('CronAuth')
77

8-
// Create a secret key for JWT signing
98
const getJwtSecret = () => {
109
const secret = new TextEncoder().encode(env.INTERNAL_API_SECRET)
1110
return secret
@@ -14,11 +13,17 @@ const getJwtSecret = () => {
1413
/**
1514
* Generate an internal JWT token for server-side API calls
1615
* Token expires in 5 minutes to keep it short-lived
16+
* @param userId Optional user ID to embed in token payload
1717
*/
18-
export async function generateInternalToken(): Promise<string> {
18+
export async function generateInternalToken(userId?: string): Promise<string> {
1919
const secret = getJwtSecret()
2020

21-
const token = await new SignJWT({ type: 'internal' })
21+
const payload: { type: string; userId?: string } = { type: 'internal' }
22+
if (userId) {
23+
payload.userId = userId
24+
}
25+
26+
const token = await new SignJWT(payload)
2227
.setProtectedHeader({ alg: 'HS256' })
2328
.setIssuedAt()
2429
.setExpirationTime('5m')
@@ -31,9 +36,11 @@ export async function generateInternalToken(): Promise<string> {
3136

3237
/**
3338
* Verify an internal JWT token
34-
* Returns true if valid, false otherwise
39+
* Returns verification result with userId if present in token
3540
*/
36-
export async function verifyInternalToken(token: string): Promise<boolean> {
41+
export async function verifyInternalToken(
42+
token: string
43+
): Promise<{ valid: boolean; userId?: string }> {
3744
try {
3845
const secret = getJwtSecret()
3946

@@ -43,10 +50,17 @@ export async function verifyInternalToken(token: string): Promise<boolean> {
4350
})
4451

4552
// Check that it's an internal token
46-
return payload.type === 'internal'
53+
if (payload.type === 'internal') {
54+
return {
55+
valid: true,
56+
userId: typeof payload.userId === 'string' ? payload.userId : undefined,
57+
}
58+
}
59+
60+
return { valid: false }
4761
} catch (error) {
4862
// Token verification failed
49-
return false
63+
return { valid: false }
5064
}
5165
}
5266

apps/sim/lib/knowledge/documents/document-processor.ts

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ export async function processDocument(
5656
mimeType: string,
5757
chunkSize = 1000,
5858
chunkOverlap = 200,
59-
minChunkSize = 1
59+
minChunkSize = 1,
60+
userId?: string,
61+
workspaceId?: string | null
6062
): Promise<{
6163
chunks: Chunk[]
6264
metadata: {
@@ -73,7 +75,7 @@ export async function processDocument(
7375
logger.info(`Processing document: ${filename}`)
7476

7577
try {
76-
const parseResult = await parseDocument(fileUrl, filename, mimeType)
78+
const parseResult = await parseDocument(fileUrl, filename, mimeType, userId, workspaceId)
7779
const { content, processingMethod } = parseResult
7880
const cloudUrl = 'cloudUrl' in parseResult ? parseResult.cloudUrl : undefined
7981

@@ -131,7 +133,9 @@ export async function processDocument(
131133
async function parseDocument(
132134
fileUrl: string,
133135
filename: string,
134-
mimeType: string
136+
mimeType: string,
137+
userId?: string,
138+
workspaceId?: string | null
135139
): Promise<{
136140
content: string
137141
processingMethod: 'file-parser' | 'mistral-ocr'
@@ -146,20 +150,26 @@ async function parseDocument(
146150
if (isPDF && (hasAzureMistralOCR || hasMistralOCR)) {
147151
if (hasAzureMistralOCR) {
148152
logger.info(`Using Azure Mistral OCR: ${filename}`)
149-
return parseWithAzureMistralOCR(fileUrl, filename, mimeType)
153+
return parseWithAzureMistralOCR(fileUrl, filename, mimeType, userId, workspaceId)
150154
}
151155

152156
if (hasMistralOCR) {
153157
logger.info(`Using Mistral OCR: ${filename}`)
154-
return parseWithMistralOCR(fileUrl, filename, mimeType)
158+
return parseWithMistralOCR(fileUrl, filename, mimeType, userId, workspaceId)
155159
}
156160
}
157161

158162
logger.info(`Using file parser: ${filename}`)
159163
return parseWithFileParser(fileUrl, filename, mimeType)
160164
}
161165

162-
async function handleFileForOCR(fileUrl: string, filename: string, mimeType: string) {
166+
async function handleFileForOCR(
167+
fileUrl: string,
168+
filename: string,
169+
mimeType: string,
170+
userId?: string,
171+
workspaceId?: string | null
172+
) {
163173
const isExternalHttps = fileUrl.startsWith('https://') && !fileUrl.includes('/api/files/serve/')
164174

165175
if (isExternalHttps) {
@@ -175,6 +185,8 @@ async function handleFileForOCR(fileUrl: string, filename: string, mimeType: str
175185
originalName: filename,
176186
uploadedAt: new Date().toISOString(),
177187
purpose: 'knowledge-base',
188+
...(userId && { userId }),
189+
...(workspaceId && { workspaceId }),
178190
}
179191

180192
const cloudResult = await StorageService.uploadFile({
@@ -288,7 +300,13 @@ async function makeOCRRequest(
288300
}
289301
}
290302

291-
async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeType: string) {
303+
async function parseWithAzureMistralOCR(
304+
fileUrl: string,
305+
filename: string,
306+
mimeType: string,
307+
userId?: string,
308+
workspaceId?: string | null
309+
) {
292310
validateOCRConfig(
293311
env.OCR_AZURE_API_KEY,
294312
env.OCR_AZURE_ENDPOINT,
@@ -336,12 +354,18 @@ async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeT
336354
})
337355

338356
return env.MISTRAL_API_KEY
339-
? parseWithMistralOCR(fileUrl, filename, mimeType)
357+
? parseWithMistralOCR(fileUrl, filename, mimeType, userId, workspaceId)
340358
: parseWithFileParser(fileUrl, filename, mimeType)
341359
}
342360
}
343361

344-
async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType: string) {
362+
async function parseWithMistralOCR(
363+
fileUrl: string,
364+
filename: string,
365+
mimeType: string,
366+
userId?: string,
367+
workspaceId?: string | null
368+
) {
345369
if (!env.MISTRAL_API_KEY) {
346370
throw new Error('Mistral API key required')
347371
}
@@ -350,7 +374,13 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
350374
throw new Error('Mistral parser tool not configured')
351375
}
352376

353-
const { httpsUrl, cloudUrl } = await handleFileForOCR(fileUrl, filename, mimeType)
377+
const { httpsUrl, cloudUrl } = await handleFileForOCR(
378+
fileUrl,
379+
filename,
380+
mimeType,
381+
userId,
382+
workspaceId
383+
)
354384
const params = { filePath: httpsUrl, apiKey: env.MISTRAL_API_KEY, resultType: 'text' as const }
355385

356386
try {
@@ -361,7 +391,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
361391
? mistralParserTool.request!.url(params)
362392
: mistralParserTool.request!.url
363393

364-
if (url.startsWith('/')) {
394+
const isInternalRoute = url.startsWith('/')
395+
396+
if (isInternalRoute) {
365397
const { getBaseUrl } = await import('@/lib/urls/utils')
366398
url = `${getBaseUrl()}${url}`
367399
}
@@ -371,9 +403,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
371403
? mistralParserTool.request!.headers(params)
372404
: mistralParserTool.request!.headers
373405

374-
if (url.includes('/api/tools/mistral/parse')) {
406+
if (isInternalRoute) {
375407
const { generateInternalToken } = await import('@/lib/auth/internal')
376-
const internalToken = await generateInternalToken()
408+
const internalToken = await generateInternalToken(userId)
377409
headers = {
378410
...headers,
379411
Authorization: `Bearer ${internalToken}`,

0 commit comments

Comments
 (0)