diff --git a/client/next.config.js b/client/next.config.js index 1ad3638..47c9e05 100644 --- a/client/next.config.js +++ b/client/next.config.js @@ -20,7 +20,7 @@ const nextConfig = { "sharp$": false, "onnxruntime-node$": false, } - + config.resolve.alias.canvas = false config.resolve.alias.encoding = false return config; diff --git a/client/package-lock.json b/client/package-lock.json index 38cc627..dbaebb2 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -12,12 +12,15 @@ "@xenova/transformers": "^2.6.2", "filepond": "^4.30.4", "framer-motion": "^10.16.4", + "fs": "^0.0.1-security", "next": "latest", + "pdf2json": "^3.0.4", "react": "latest", "react-dom": "latest", "react-drag-drop-files": "^2.3.10", "react-filepond": "^7.1.2", - "react-pdf": "^7.5.0" + "react-pdf": "^7.5.0", + "uuid": "^9.0.1" }, "devDependencies": { "@types/long": "^5.0.0", @@ -5153,6 +5156,11 @@ } } }, + "node_modules/fs": { + "version": "0.0.1-security", + "resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz", + "integrity": "sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w==" + }, "node_modules/fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -7034,6 +7042,32 @@ "node": ">=8" } }, + "node_modules/pdf2json": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pdf2json/-/pdf2json-3.0.4.tgz", + "integrity": "sha512-NKmSg78W5V/T3Qvp+TPkYeARdP/XzxBTlhRGdDMrOI1beyI72JxW5u4yy5825ge3opzu4HF0xDgg+HZbYvbr4g==", + "bundleDependencies": [ + "@xmldom/xmldom" + ], + "dependencies": { + "@xmldom/xmldom": "^0.8.6" + }, + "bin": { + "pdf2json": "bin/pdf2json.js" + }, + "engines": { + "node": ">=18.12.1", + "npm": ">=8.19.2" + } + }, + "node_modules/pdf2json/node_modules/@xmldom/xmldom": { + "version": "0.8.7", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/pdfjs-dist": { "version": "3.11.174", "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-3.11.174.tgz", @@ -8864,6 +8898,18 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/watchpack": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz", diff --git a/client/package.json b/client/package.json index 3bac484..bda1154 100644 --- a/client/package.json +++ b/client/package.json @@ -13,12 +13,15 @@ "@xenova/transformers": "^2.6.2", "filepond": "^4.30.4", "framer-motion": "^10.16.4", + "fs": "^0.0.1-security", "next": "latest", + "pdf2json": "^3.0.4", "react": "latest", "react-dom": "latest", "react-drag-drop-files": "^2.3.10", "react-filepond": "^7.1.2", - "react-pdf": "^7.5.0" + "react-pdf": "^7.5.0", + "uuid": "^9.0.1" }, "devDependencies": { "@types/long": "^5.0.0", diff --git a/client/public/sample3.pdf b/client/public/sample3.pdf new file mode 100644 index 0000000..eec5967 Binary files /dev/null and b/client/public/sample3.pdf differ diff --git a/client/src/app/api/upload/route.ts b/client/src/app/api/upload/route.ts new file mode 100644 index 0000000..11f92c8 --- /dev/null +++ b/client/src/app/api/upload/route.ts @@ -0,0 +1,60 @@ +import { NextRequest, NextResponse } from 'next/server'; // To handle the request and response +import { promises as fs } from 'fs'; // To save the file temporarily +import { v4 as uuidv4 } from 'uuid'; // To generate a unique filename +import PDFParser from 'pdf2json'; // To parse the pdf + +export async function POST(req: NextRequest) { + const formData: FormData = await req.formData(); + const uploadedFiles = formData.getAll('filepond'); + let fileName = ''; + let parsedText = ''; + + if (uploadedFiles && uploadedFiles.length > 0) { + const uploadedFile = uploadedFiles[1]; + console.log('Uploaded file:', uploadedFile); + + // Check if uploadedFile is of type File + if (uploadedFile instanceof File) { + // Generate a unique filename + fileName = uuidv4(); + + // Convert the uploaded file into a temporary file + const tempFilePath = `/tmp/${fileName}.pdf`; + + // Convert ArrayBuffer to Buffer + const fileBuffer = Buffer.from(await uploadedFile.arrayBuffer()); + + // Save the buffer as a file + await fs.writeFile(tempFilePath, fileBuffer); + + // Parse the pdf using pdf2json. See pdf2json docs for more info. + + // The reason I am bypassing type checks is because + // the default type definitions for pdf2json in the npm install + // do not allow for any constructor arguments. + // You can either modify the type definitions or bypass the type checks. + // I chose to bypass the type checks. + const pdfParser = new (PDFParser as any)(null, 1); + + // See pdf2json docs for more info on how the below works. + pdfParser.on('pdfParser_dataError', (errData: any) => + console.log(errData.parserError) + ); + + pdfParser.on('pdfParser_dataReady', () => { + console.log((pdfParser as any).getRawTextContent()); + parsedText = (pdfParser as any).getRawTextContent(); + }); + + pdfParser.loadPDF(tempFilePath); + } else { + console.log('Uploaded file is not in the expected format.'); + } + } else { + console.log('No files found.'); + } + + const response = new NextResponse(parsedText); + response.headers.set('FileName', fileName); + return response; +} \ No newline at end of file diff --git a/client/src/app/page.tsx b/client/src/app/page.tsx index 323f382..0112135 100644 --- a/client/src/app/page.tsx +++ b/client/src/app/page.tsx @@ -13,7 +13,7 @@ import { CardBody, } from "@nextui-org/react"; -import FileUploader from "@components/DropFile"; +import DropFile from "@components/DropFile"; import FileViewer from "@components/FileViewer"; @@ -86,7 +86,7 @@ const Home = () => {

- + diff --git a/client/src/components/DropFile.tsx b/client/src/components/DropFile.tsx index 2511356..92b86ff 100644 --- a/client/src/components/DropFile.tsx +++ b/client/src/components/DropFile.tsx @@ -1,9 +1,11 @@ -import React, { useState } from "react"; -import { FileUploader } from "react-drag-drop-files"; +"use client"; -const fileTypes = ["PDF"]; +import { FilePond } from "react-filepond"; +import "filepond/dist/filepond.min.css"; +import { useState } from "react"; -const DropFile = () => { + +export default const DropFile = () => { return (
@@ -15,7 +17,6 @@ const DropFile = () => { className="max-w-max" />
- ); -}; -export default DropFile; + ); +} diff --git a/client/src/components/FileViewer.tsx b/client/src/components/FileViewer.tsx index e373708..41350df 100644 --- a/client/src/components/FileViewer.tsx +++ b/client/src/components/FileViewer.tsx @@ -1,11 +1,11 @@ "use client"; -import React, { useCallback, useState } from "react"; +import React, { useCallback, useEffect, useState } from "react"; import { Button } from "@nextui-org/react"; import { pdfjs, Document, Page } from "react-pdf"; -// import "react-pdf/dist/Page/AnnotationLayer.css"; +import "react-pdf/dist/Page/AnnotationLayer.css"; import "react-pdf/dist/Page/TextLayer.css"; pdfjs.GlobalWorkerOptions.workerSrc = new URL( @@ -21,27 +21,16 @@ function highlightPattern(text: string, pattern: string) { } export default function FileViewer() { - const [searchText, setSearchText] = useState(""); + const [searchText, setSearchText] = useState(""); const [numPages, setNumPages] = useState(null); - const [pageNumber, setPageNumber] = useState(1); - // Display the first page + const [currentPageText, setCurrentPageText] = useState(""); + const [context, setContext] = useState(""); + + // Get number of pages function onDocumentLoadSuccess({ numPages }: any) { setNumPages(numPages); - setPageNumber(1); - } - - // Navigation - function changePage(offset: any) { - setPageNumber((prevPageNumber) => prevPageNumber + offset); - } - function previousPage() { - changePage(-1); - } - - function nextPage() { - changePage(1); } // Highlighting given text @@ -50,38 +39,31 @@ export default function FileViewer() { [searchText], ); + // Get text from single page + const getTextPage = useCallback( + (e: any) => + e.getTextContent().then((textContent: any) => { + let pageText: string = textContent.items + .map((s: any) => s.str) + .join(""); + setCurrentPageText(pageText); + console.log("Current page: " + pageText); + }), + [], + ); + + // Update context (still missing the last page) + useEffect(() => { + setContext(context + " " + currentPageText); + console.log("Context: " + context); + }, [currentPageText]); + // Uodate search text function onChange(event: any) { setSearchText(event.target.value); } return ( -
-
-

- Page {pageNumber || (numPages ? 1 : "--")} of {numPages || "--"} -

-
- - -
-
- - - -
+ + {Array.from(new Array(numPages), (el, index) => ( + + ))} +
); }