diff --git a/.github/workflows/backend-review.yml b/.github/workflows/backend-review.yml index b7bccecae84e..a054d348eec3 100644 --- a/.github/workflows/backend-review.yml +++ b/.github/workflows/backend-review.yml @@ -1,5 +1,6 @@ name: Backend Unit Tests on: + workflow_dispatch: pull_request: branches: - main @@ -67,4 +68,4 @@ jobs: run: cd packages/data-provider && npm run test:ci - name: Run librechat-mcp unit tests - run: cd packages/mcp && npm run test:ci \ No newline at end of file + run: cd packages/mcp && npm run test:ci diff --git a/.gitignore b/.gitignore index f49594afdfc6..9bc0601b56eb 100644 --- a/.gitignore +++ b/.gitignore @@ -112,6 +112,9 @@ auth.json # User uploads uploads/ +# Ollama +ollama/ + # owner release/ diff --git a/api/server/services/Files/TikaOCR/crud.js b/api/server/services/Files/TikaOCR/crud.js new file mode 100644 index 000000000000..f6f99800afee --- /dev/null +++ b/api/server/services/Files/TikaOCR/crud.js @@ -0,0 +1,109 @@ +// ~/server/services/Files/TikaOCR/crud.js +const fs = require('fs'); +const path = require('path'); +const FormData = require('form-data'); +const { FileSources, envVarRegex, extractEnvVariable } = require('librechat-data-provider'); +const { loadAuthValues } = require('~/server/services/Tools/credentials'); +const { logger, createAxiosInstance } = require('~/config'); +const { logAxiosError } = require('~/utils/axios'); + +const axios = createAxiosInstance(); + +function extractVariableName(str) { + const match = str.match(envVarRegex); + return match ? match[1] : null; +} + +/** + * Uploads a document to Tika. DOES NOT FILE STREAM. + * + * @param {Object} params Upload parameters + * @param {string} params.filePath The path to the file on disk + * @param {string} [params.baseURL=http://tika:9998] Tika API base URL if using docker + * @returns {Promise} The response from Tika + */ +async function uploadDocumentToTika({ + filePath, + baseURL = 'http://tika:9998', +}) { + const fileData = fs.readFileSync(filePath); // Read the entire file into memory :( + + return axios + .put(`${baseURL}/tika`, fileData, { + headers: { + 'Content-Type': 'application/pdf', // This should be dynamic based on the file type + 'Accept': 'text/plain', + }, + maxBodyLength: Infinity, + maxContentLength: Infinity, + }) + .then((res) => res.data) + .catch((error) => { + logger.error('Error uploading document to Tika:', error.message); + throw error; + }); +} + +/** + * Uploads a file to the Tika OCR API and processes the OCR result. + * + * @param {Object} params - The params object. + * @param {ServerRequest} params.req - The request object from Express. It should have a `user` property with an `id` + * representing the user + * @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should + * have a `mimetype` property that tells us the file type + * @param {string} params.file_id - The file ID. + * @param {string} [params.entity_id] - The entity ID, not used here but passed for consistency. + * @returns {Promise<{ filepath: string, bytes: number }>} - The result object containing the processed `text` and `images` (not currently used), + * along with the `filename` and `bytes` properties. + */ +const uploadTikaOCR = async ({ req, file, file_id, entity_id }) => { + try { + /** @type {TCustomConfig['ocr']} */ + const ocrConfig = req.app.locals?.ocr; + + const baseURLConfig = ocrConfig.baseURL || ''; + + const isBaseURLEnvVar = envVarRegex.test(baseURLConfig); + + const isBaseURLEmpty = !baseURLConfig.trim(); + + let baseURL; + + if (isBaseURLEnvVar || isBaseURLEmpty) { + const baseURLVarName = isBaseURLEnvVar ? extractVariableName(baseURLConfig) : 'OCR_BASEURL'; + + const authValues = await loadAuthValues({ + userId: req.user.id, + authFields: [baseURLVarName], + optional: new Set([baseURLVarName]), + }); + + baseURL = authValues[baseURLVarName]; + } else { + baseURL = baseURLConfig; + } + + const extractedText = await uploadDocumentToTika({ + filePath: file.path, + baseURL, + }); + + + + return { + filename: file.originalname, + bytes: extractedText.length * 4, + filepath: FileSources.tika_ocr, + text: extractedText, + images: [] // Not used in this implementation + }; + } catch (error) { + const message = 'Error uploading document to Tika OCR API'; + throw new Error(logAxiosError({ error, message })); + } +}; + +module.exports = { + uploadTikaOCR +}; diff --git a/api/server/services/Files/TikaOCR/crud.spec.js b/api/server/services/Files/TikaOCR/crud.spec.js new file mode 100644 index 000000000000..9cc7445a98c3 --- /dev/null +++ b/api/server/services/Files/TikaOCR/crud.spec.js @@ -0,0 +1,174 @@ +const fs = require('fs'); +const mockAxios = { + put: jest.fn().mockResolvedValue({ data: 'Extracted text from Tika' }), + interceptors: { + response: { + use: jest.fn(), + }, + }, +}; +jest.mock('axios', () => mockAxios); +jest.mock('fs'); +jest.mock('~/config', () => ({ + logger: { + error: jest.fn(), + }, + createAxiosInstance: () => mockAxios, +})); +jest.mock('~/server/services/Tools/credentials', () => ({ + loadAuthValues: jest.fn(), +})); + +const { uploadTikaOCR } = require('./crud'); + +describe('TikaOCR Service', () => { + afterEach(() => { + jest.clearAllMocks(); + }); + + describe('uploadDocumentToTika', () => { + it('should upload a document to Tika and return extracted text', async () => { + const mockFilePath = '/path/to/test.pdf'; + const mockFileData = Buffer.from('mock file data'); + fs.readFileSync.mockReturnValue(mockFileData); + + const result = await uploadTikaOCR({ + req: { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + baseURL: 'http://tika:9998', + }, + }, + }, + }, + file: { + path: mockFilePath, + originalname: 'test.pdf', + }, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(fs.readFileSync).toHaveBeenCalledWith(mockFilePath); + expect(mockAxios.put).toHaveBeenCalledWith( + 'http://tika:9998/tika', + mockFileData, + expect.objectContaining({ + headers: { + 'Content-Type': 'application/pdf', + Accept: 'text/plain', + }, + maxBodyLength: Infinity, + maxContentLength: Infinity, + }), + ); + expect(result).toEqual({ + filename: 'test.pdf', + bytes: 'Extracted text from Tika'.length * 4, + filepath: 'tika_ocr', + text: 'Extracted text from Tika', + images: [], + }); + }); + + it('should handle errors during document upload', async () => { + const errorMessage = 'Tika API error'; + mockAxios.put.mockRejectedValueOnce(new Error(errorMessage)); + + await expect( + uploadTikaOCR({ + req: { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + baseURL: 'http://tika:9998', + }, + }, + }, + }, + file: { + path: '/path/to/test.pdf', + originalname: 'test.pdf', + }, + file_id: 'file123', + entity_id: 'entity123', + }), + ).rejects.toThrow('Error uploading document to Tika OCR API'); + + const { logger } = require('~/config'); + expect(logger.error).toHaveBeenCalledWith( + expect.stringContaining('Error uploading document to Tika:'), + expect.any(String), + ); + }); + + it('should resolve baseURL from environment variables when configured', async () => { + const { loadAuthValues } = require('~/server/services/Tools/credentials'); + loadAuthValues.mockResolvedValue({ + OCR_BASEURL: 'http://tika:9998', + }); + + const result = await uploadTikaOCR({ + req: { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + baseURL: '${OCR_BASEURL}', + }, + }, + }, + }, + file: { + path: '/path/to/test.pdf', + originalname: 'test.pdf', + }, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(loadAuthValues).toHaveBeenCalledWith({ + userId: 'user123', + authFields: ['OCR_BASEURL'], + optional: expect.any(Set), + }); + expect(mockAxios.put).toHaveBeenCalledWith( + 'http://tika:9998/tika', + expect.any(Buffer), + expect.any(Object), + ); + expect(result.text).toEqual('Extracted text from Tika'); + }); + + it('should handle empty baseURL and use default', async () => { + const result = await uploadTikaOCR({ + req: { + user: { id: 'user123' }, + app: { + locals: { + ocr: { + baseURL: '', + }, + }, + }, + }, + file: { + path: '/path/to/test.pdf', + originalname: 'test.pdf', + }, + file_id: 'file123', + entity_id: 'entity123', + }); + + expect(mockAxios.put).toHaveBeenCalledWith( + 'http://tika:9998/tika', + expect.any(Buffer), + expect.any(Object), + ); + expect(result.text).toEqual('Extracted text from Tika'); + }); + }); +}); \ No newline at end of file diff --git a/api/server/services/Files/TikaOCR/index.js b/api/server/services/Files/TikaOCR/index.js new file mode 100644 index 000000000000..a6223d1ee5d2 --- /dev/null +++ b/api/server/services/Files/TikaOCR/index.js @@ -0,0 +1,5 @@ +const crud = require('./crud'); + +module.exports = { + ...crud, +}; diff --git a/api/server/services/Files/process.js b/api/server/services/Files/process.js index 94b1bc4dadc3..b3a98a49c723 100644 --- a/api/server/services/Files/process.js +++ b/api/server/services/Files/process.js @@ -522,7 +522,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { throw new Error('OCR capability is not enabled for Agents'); } - const { handleFileUpload: uploadMistralOCR } = getStrategyFunctions( + const { handleFileUpload: uploadOCR } = getStrategyFunctions( req.app.locals?.ocr?.strategy ?? FileSources.mistral_ocr, ); const { file_id, temp_file_id } = metadata; @@ -534,7 +534,7 @@ const processAgentFileUpload = async ({ req, res, metadata }) => { images, filename, filepath: ocrFileURL, - } = await uploadMistralOCR({ req, file, file_id, entity_id: agent_id, basePath }); + } = await uploadOCR({ req, file, file_id, entity_id: agent_id, basePath }); const fileInfo = removeNullishValues({ text, diff --git a/api/server/services/Files/strategies.js b/api/server/services/Files/strategies.js index c6cfe77069ed..58953f0f7e5c 100644 --- a/api/server/services/Files/strategies.js +++ b/api/server/services/Files/strategies.js @@ -47,6 +47,7 @@ const { uploadOpenAIFile, deleteOpenAIFile, getOpenAIFileStream } = require('./O const { getCodeOutputDownloadStream, uploadCodeEnvFile } = require('./Code'); const { uploadVectors, deleteVectors } = require('./VectorDB'); const { uploadMistralOCR } = require('./MistralOCR'); +const { uploadTikaOCR } = require('./TikaOCR'); /** * Firebase Storage Strategy Functions @@ -202,6 +203,26 @@ const mistralOCRStrategy = () => ({ handleFileUpload: uploadMistralOCR, }); +const tikaOCRStrategy = () => ({ + /** @type {typeof saveFileFromURL | null} */ + saveURL: null, + /** @type {typeof getLocalFileURL | null} */ + getFileURL: null, + /** @type {typeof saveLocalBuffer | null} */ + saveBuffer: null, + /** @type {typeof processLocalAvatar | null} */ // I am not sure if this is correct + processAvatar: null, + /** @type {typeof uploadLocalImage | null} */ + handleImageUpload: null, + /** @type {typeof prepareImagesLocal | null} */ + prepareImagePayload: null, + /** @type {typeof deleteLocalFile | null} */ + deleteFile: null, + /** @type {typeof getLocalFileStream | null} */ + getDownloadStream: null, + handleFileUpload: uploadTikaOCR, +}); + // Strategy Selector const getStrategyFunctions = (fileSource) => { if (fileSource === FileSources.firebase) { @@ -222,6 +243,8 @@ const getStrategyFunctions = (fileSource) => { return codeOutputStrategy(); } else if (fileSource === FileSources.mistral_ocr) { return mistralOCRStrategy(); + } else if (fileSource === FileSources.tika_ocr) { + return tikaOCRStrategy(); } else { throw new Error('Invalid file source'); } diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example index 8c8aba9ed089..7051b0725690 100644 --- a/docker-compose.override.yml.example +++ b/docker-compose.override.yml.example @@ -123,6 +123,20 @@ # volumes: # - ./ollama:/root/.ollama +# # ADD TIKA +# tika: +# image: apache/tika:latest-full +# container_name: tika +# ports: +# - "${TIKA_PORT-9998}:9998" +# configs: +# - source: tika_config_3 +# target: /tika-config.xml +# +# configs: +# tika_config_3: +# file: ./tika-config.xml + # # ADD LITELLM BASIC - NEED TO CONFIGURE litellm-config.yaml, ONLY NEED ENV TO ENABLE REDIS FOR CACHING OR LANGFUSE FOR MONITORING # litellm: # image: ghcr.io/berriai/litellm:main-latest diff --git a/librechat.example.yaml b/librechat.example.yaml index 38cb26eb4d27..746e2a731c4b 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -156,6 +156,14 @@ actions: # - "mcp-obsidian" # - /path/to/obsidian/vault +# Example Tika OCR configuration + +# ocr: +# apiKey: "none" +# baseURL: "http://tika:9998" +# strategy: "tika_ocr" + + # Definition of custom endpoints endpoints: # assistants: @@ -181,6 +189,22 @@ endpoints: # # (optional) Agent Capabilities available to all users. Omit the ones you wish to exclude. Defaults to list below. # capabilities: ["execute_code", "file_search", "actions", "tools"] custom: + # Ollama example + - name: "ollama" + apiKey: "ollama" + baseURL: "http://ollama:11434/v1/" + models: + default: + [ + "llama3.2:3b", + ] + fetch: true + titleConvo: true + titleModel: "current_model" + summarize: false + summaryModel: "current_model" + forcePrompt: false + modelDisplayLabel: "Ollama" # Groq Example - name: 'groq' apiKey: '${GROQ_API_KEY}' diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index 005efa44343a..2e7ede3d8ab7 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -586,6 +586,7 @@ export type TStartupConfig = { export enum OCRStrategy { MISTRAL_OCR = 'mistral_ocr', CUSTOM_OCR = 'custom_ocr', + TIKA_OCR = 'tika_ocr', } export enum SearchCategories { diff --git a/packages/data-provider/src/types/files.ts b/packages/data-provider/src/types/files.ts index 927002630fb8..52d39dafe632 100644 --- a/packages/data-provider/src/types/files.ts +++ b/packages/data-provider/src/types/files.ts @@ -10,6 +10,7 @@ export enum FileSources { vectordb = 'vectordb', execute_code = 'execute_code', mistral_ocr = 'mistral_ocr', + tika_ocr = 'tika_ocr', text = 'text', } diff --git a/tika-config.xml b/tika-config.xml new file mode 100644 index 000000000000..26f126c4c7e7 --- /dev/null +++ b/tika-config.xml @@ -0,0 +1,36 @@ + + + + + + application/pdf + + + + + application/pdf + + + + + + OCR_AND_TEXT_EXTRACTION + + + + application/xml + text/xml + + + + + + + debug + + true + true + + + \ No newline at end of file