@@ -56,7 +56,9 @@ export async function processDocument(
5656  mimeType : string , 
5757  chunkSize  =  1000 , 
5858  chunkOverlap  =  200 , 
59-   minChunkSize  =  1 
59+   minChunkSize  =  1 , 
60+   userId ?: string , 
61+   workspaceId ?: string  |  null 
6062) : Promise < { 
6163  chunks : Chunk [ ] 
6264  metadata : { 
@@ -73,7 +75,7 @@ export async function processDocument(
7375  logger . info ( `Processing document: ${ filename }  ) 
7476
7577  try  { 
76-     const  parseResult  =  await  parseDocument ( fileUrl ,  filename ,  mimeType ) 
78+     const  parseResult  =  await  parseDocument ( fileUrl ,  filename ,  mimeType ,   userId ,   workspaceId ) 
7779    const  {  content,  processingMethod }  =  parseResult 
7880    const  cloudUrl  =  'cloudUrl'  in  parseResult  ? parseResult . cloudUrl  : undefined 
7981
@@ -131,7 +133,9 @@ export async function processDocument(
131133async  function  parseDocument ( 
132134  fileUrl : string , 
133135  filename : string , 
134-   mimeType : string 
136+   mimeType : string , 
137+   userId ?: string , 
138+   workspaceId ?: string  |  null 
135139) : Promise < { 
136140  content : string 
137141  processingMethod : 'file-parser'  |  'mistral-ocr' 
@@ -146,20 +150,26 @@ async function parseDocument(
146150  if  ( isPDF  &&  ( hasAzureMistralOCR  ||  hasMistralOCR ) )  { 
147151    if  ( hasAzureMistralOCR )  { 
148152      logger . info ( `Using Azure Mistral OCR: ${ filename }  ) 
149-       return  parseWithAzureMistralOCR ( fileUrl ,  filename ,  mimeType ) 
153+       return  parseWithAzureMistralOCR ( fileUrl ,  filename ,  mimeType ,   userId ,   workspaceId ) 
150154    } 
151155
152156    if  ( hasMistralOCR )  { 
153157      logger . info ( `Using Mistral OCR: ${ filename }  ) 
154-       return  parseWithMistralOCR ( fileUrl ,  filename ,  mimeType ) 
158+       return  parseWithMistralOCR ( fileUrl ,  filename ,  mimeType ,   userId ,   workspaceId ) 
155159    } 
156160  } 
157161
158162  logger . info ( `Using file parser: ${ filename }  ) 
159163  return  parseWithFileParser ( fileUrl ,  filename ,  mimeType ) 
160164} 
161165
162- async  function  handleFileForOCR ( fileUrl : string ,  filename : string ,  mimeType : string )  { 
166+ async  function  handleFileForOCR ( 
167+   fileUrl : string , 
168+   filename : string , 
169+   mimeType : string , 
170+   userId ?: string , 
171+   workspaceId ?: string  |  null 
172+ )  { 
163173  const  isExternalHttps  =  fileUrl . startsWith ( 'https://' )  &&  ! fileUrl . includes ( '/api/files/serve/' ) 
164174
165175  if  ( isExternalHttps )  { 
@@ -175,6 +185,8 @@ async function handleFileForOCR(fileUrl: string, filename: string, mimeType: str
175185      originalName : filename , 
176186      uploadedAt : new  Date ( ) . toISOString ( ) , 
177187      purpose : 'knowledge-base' , 
188+       ...( userId  &&  {  userId } ) , 
189+       ...( workspaceId  &&  {  workspaceId } ) , 
178190    } 
179191
180192    const  cloudResult  =  await  StorageService . uploadFile ( { 
@@ -288,7 +300,13 @@ async function makeOCRRequest(
288300  } 
289301} 
290302
291- async  function  parseWithAzureMistralOCR ( fileUrl : string ,  filename : string ,  mimeType : string )  { 
303+ async  function  parseWithAzureMistralOCR ( 
304+   fileUrl : string , 
305+   filename : string , 
306+   mimeType : string , 
307+   userId ?: string , 
308+   workspaceId ?: string  |  null 
309+ )  { 
292310  validateOCRConfig ( 
293311    env . OCR_AZURE_API_KEY , 
294312    env . OCR_AZURE_ENDPOINT , 
@@ -336,12 +354,18 @@ async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeT
336354    } ) 
337355
338356    return  env . MISTRAL_API_KEY 
339-       ? parseWithMistralOCR ( fileUrl ,  filename ,  mimeType ) 
357+       ? parseWithMistralOCR ( fileUrl ,  filename ,  mimeType ,   userId ,   workspaceId ) 
340358      : parseWithFileParser ( fileUrl ,  filename ,  mimeType ) 
341359  } 
342360} 
343361
344- async  function  parseWithMistralOCR ( fileUrl : string ,  filename : string ,  mimeType : string )  { 
362+ async  function  parseWithMistralOCR ( 
363+   fileUrl : string , 
364+   filename : string , 
365+   mimeType : string , 
366+   userId ?: string , 
367+   workspaceId ?: string  |  null 
368+ )  { 
345369  if  ( ! env . MISTRAL_API_KEY )  { 
346370    throw  new  Error ( 'Mistral API key required' ) 
347371  } 
@@ -350,7 +374,13 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
350374    throw  new  Error ( 'Mistral parser tool not configured' ) 
351375  } 
352376
353-   const  {  httpsUrl,  cloudUrl }  =  await  handleFileForOCR ( fileUrl ,  filename ,  mimeType ) 
377+   const  {  httpsUrl,  cloudUrl }  =  await  handleFileForOCR ( 
378+     fileUrl , 
379+     filename , 
380+     mimeType , 
381+     userId , 
382+     workspaceId 
383+   ) 
354384  const  params  =  {  filePath : httpsUrl ,  apiKey : env . MISTRAL_API_KEY ,  resultType : 'text'  as  const  } 
355385
356386  try  { 
@@ -361,7 +391,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
361391            ? mistralParserTool . request ! . url ( params ) 
362392            : mistralParserTool . request ! . url 
363393
364-         if  ( url . startsWith ( '/' ) )  { 
394+         const  isInternalRoute  =  url . startsWith ( '/' ) 
395+ 
396+         if  ( isInternalRoute )  { 
365397          const  {  getBaseUrl }  =  await  import ( '@/lib/urls/utils' ) 
366398          url  =  `${ getBaseUrl ( ) } ${ url }  
367399        } 
@@ -371,9 +403,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
371403            ? mistralParserTool . request ! . headers ( params ) 
372404            : mistralParserTool . request ! . headers 
373405
374-         if  ( url . includes ( '/api/tools/mistral/parse' ) )  { 
406+         if  ( isInternalRoute )  { 
375407          const  {  generateInternalToken }  =  await  import ( '@/lib/auth/internal' ) 
376-           const  internalToken  =  await  generateInternalToken ( ) 
408+           const  internalToken  =  await  generateInternalToken ( userId ) 
377409          headers  =  { 
378410            ...headers , 
379411            Authorization : `Bearer ${ internalToken }  , 
0 commit comments