Skip to content

Commit 164d7d2

Browse files
HellozaqjxfzzztsebastianMindee
authored
⬆️ Bump pdfbox from 2.0.31 to 3.0.5 (#240)
* bump pdfbox * fix breaking changes in `org.apache.pdfbox:pdfbox` * Update src/main/java/com/mindee/input/InputSourceUtils.java * Update src/main/java/com/mindee/input/InputSourceUtils.java --------- Co-authored-by: zhouzhuotong <1160490625@qq.com> Co-authored-by: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com>
1 parent 04cf4b9 commit 164d7d2

File tree

8 files changed

+28
-18
lines changed

8 files changed

+28
-18
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@
415415
<org.apache.maven.project.info.reports.version>3.0.0</org.apache.maven.project.info.reports.version>
416416
<org.apache.maven.site.version>3.7.1</org.apache.maven.site.version>
417417
<org.apache.maven.surfire.version>3.2.5</org.apache.maven.surfire.version>
418-
<org.apache.pdfbox.version>2.0.31</org.apache.pdfbox.version>
418+
<org.apache.pdfbox.version>3.0.5</org.apache.pdfbox.version>
419419
<org.junit.jupiter.version>5.8.2</org.junit.jupiter.version>
420420
<org.junit.pioneer.version>1.9.1</org.junit.pioneer.version>
421421
<org.junit.platform.version>1.8.2</org.junit.platform.version>

src/main/java/com/mindee/extraction/PDFExtractor.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.List;
1515
import java.util.stream.Collectors;
1616
import javax.imageio.ImageIO;
17+
import org.apache.pdfbox.Loader;
1718
import org.apache.pdfbox.pdmodel.PDDocument;
1819
import org.apache.pdfbox.pdmodel.PDPage;
1920
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -46,7 +47,7 @@ public PDFExtractor(String filePath) throws IOException {
4647
public PDFExtractor(LocalInputSource source) throws IOException {
4748
this.filename = source.getFilename();
4849
if (source.isPdf()) {
49-
this.sourcePdf = PDDocument.load(source.getFile());
50+
this.sourcePdf = Loader.loadPDF(source.getFile());
5051
} else {
5152
PDDocument document = new PDDocument();
5253
PDPage page = new PDPage();
@@ -104,7 +105,7 @@ public List<ExtractedPDF> extractSubDocuments(List<List<Integer>> pageIndexes)
104105
+ String.format("%3s", pageIndexElement.get(pageIndexElement.size() - 1) + 1)
105106
.replace(" ", "0") + "." + splitName[1];
106107
extractedPDFs.add(
107-
new ExtractedPDF(PDDocument.load(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
108+
new ExtractedPDF(Loader.loadPDF(mergePdfPages(this.sourcePdf, pageIndexElement, false)),
108109
fieldFilename));
109110
}
110111
return extractedPDFs;

src/main/java/com/mindee/input/InputSourceUtils.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import java.io.IOException;
66
import java.net.URL;
77
import javax.imageio.ImageIO;
8+
import org.apache.pdfbox.Loader;
9+
import org.apache.pdfbox.io.RandomAccessReadBuffer;
810
import org.apache.pdfbox.pdmodel.PDDocument;
911
import org.apache.pdfbox.text.PDFTextStripper;
1012

@@ -70,7 +72,7 @@ public static String[] splitNameStrict(String filename) throws MindeeException {
7072
* Returns true if the file is a PDF.
7173
*/
7274
public static boolean isPdf(byte[] fileBytes) {
73-
try (PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes))) {
75+
try (PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes)))) {
7476
return true;
7577
} catch (IOException e) {
7678
return false;
@@ -95,7 +97,7 @@ public static void validateUrl(URL inputUrl) {
9597
*/
9698
public static boolean hasSourceText(byte[] fileBytes) {
9799
try {
98-
PDDocument document = PDDocument.load(new ByteArrayInputStream(fileBytes));
100+
PDDocument document = Loader.loadPDF(new RandomAccessReadBuffer(new ByteArrayInputStream(fileBytes)));
99101
PDFTextStripper stripper = new PDFTextStripper();
100102

101103
for (int i = 0; i < document.getNumberOfPages(); i++) {

src/main/java/com/mindee/pdf/PDFUtils.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.io.IOException;
99
import java.util.ArrayList;
1010
import java.util.List;
11+
import org.apache.pdfbox.Loader;
1112
import org.apache.pdfbox.cos.COSDictionary;
1213
import org.apache.pdfbox.cos.COSName;
1314
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -16,6 +17,7 @@
1617
import org.apache.pdfbox.pdmodel.PDResources;
1718
import org.apache.pdfbox.pdmodel.common.PDRectangle;
1819
import org.apache.pdfbox.pdmodel.font.PDType1Font;
20+
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
1921
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
2022
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
2123
import org.apache.pdfbox.rendering.ImageType;
@@ -37,7 +39,7 @@ private PDFUtils() {
3739
* @param inputSource The PDF file.
3840
*/
3941
public static int getNumberOfPages(LocalInputSource inputSource) throws IOException {
40-
PDDocument document = PDDocument.load(inputSource.getFile());
42+
PDDocument document = Loader.loadPDF(inputSource.getFile());
4143
int pageCount = document.getNumberOfPages();
4244
document.close();
4345
return pageCount;
@@ -81,7 +83,7 @@ private static byte[] createPdfFromExistingPdf(
8183
* @param pageNumbers Lit of page numbers to merge together.
8284
*/
8385
public static byte[] mergePdfPages(File file, List<Integer> pageNumbers) throws IOException {
84-
PDDocument document = PDDocument.load(file);
86+
PDDocument document = Loader.loadPDF(file);
8587
return createPdfFromExistingPdf(document, pageNumbers, true);
8688
}
8789

@@ -100,7 +102,7 @@ public static byte[] mergePdfPages(
100102

101103

102104
public static boolean isPdfEmpty(File file) throws IOException {
103-
return checkIfPdfIsEmpty(PDDocument.load(file));
105+
return checkIfPdfIsEmpty(Loader.loadPDF(file));
104106
}
105107

106108
private static boolean checkIfPdfIsEmpty(PDDocument document) throws IOException {
@@ -143,7 +145,7 @@ public static List<PdfPageImage> pdfToImages(String filePath) throws IOException
143145
* @return List of all pages as images.
144146
*/
145147
public static List<PdfPageImage> pdfToImages(LocalInputSource source) throws IOException {
146-
PDDocument document = PDDocument.load(source.getFile());
148+
PDDocument document = Loader.loadPDF(source.getFile());
147149
PDFRenderer pdfRenderer = new PDFRenderer(document);
148150
List<PdfPageImage> pdfPageImages = new ArrayList<>();
149151
for (int i = 0; i < document.getNumberOfPages(); i++) {
@@ -182,7 +184,7 @@ public static PdfPageImage pdfPageToImage(
182184
int pageNumber
183185
) throws IOException {
184186
int index = pageNumber - 1;
185-
PDDocument document = PDDocument.load(source.getFile());
187+
PDDocument document = Loader.loadPDF(source.getFile());
186188
PDFRenderer pdfRenderer = new PDFRenderer(document);
187189
BufferedImage imageBuffer = pdfPageToImageBuffer(index, document, pdfRenderer);
188190
document.close();
@@ -242,7 +244,7 @@ protected void writeString(String text, List<TextPosition> textPositions) throws
242244
try {
243245
contentStream.showText(text);
244246
} catch (IllegalArgumentException | UnsupportedOperationException e) {
245-
contentStream.setFont(PDType1Font.HELVETICA, fontSize);
247+
contentStream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), fontSize);
246248
contentStream.showText(text);
247249
}
248250
contentStream.endText();

src/main/java/com/mindee/pdf/PdfBoxApi.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import java.util.stream.Collectors;
1313
import java.util.stream.IntStream;
1414
import java.util.stream.Stream;
15+
import org.apache.pdfbox.Loader;
1516
import org.apache.pdfbox.pdmodel.PDDocument;
1617

1718
/**
@@ -26,7 +27,7 @@ public SplitPdf split(SplitQuery splitQuery) throws IOException {
2627
throw new MindeeException("This document cannot be open and cannot be split.");
2728
}
2829

29-
try (PDDocument originalDocument = PDDocument.load(splitQuery.getFile())) {
30+
try (PDDocument originalDocument = Loader.loadPDF(splitQuery.getFile())) {
3031
try (PDDocument splitDocument = new PDDocument()) {
3132
int totalOriginalPages = countPages(splitQuery.getFile());
3233

@@ -73,7 +74,7 @@ private List<Integer> getPageRanges(PageOptions pageOptions, Integer numberOfPag
7374
private boolean checkPdfOpen(byte[] documentFile) {
7475
boolean opens = false;
7576
try {
76-
PDDocument.load(documentFile).close();
77+
Loader.loadPDF(documentFile).close();
7778
opens = true;
7879
} catch (IOException e) {
7980
e.printStackTrace();
@@ -82,7 +83,7 @@ private boolean checkPdfOpen(byte[] documentFile) {
8283
}
8384

8485
private int countPages(byte[] documentFile) throws IOException {
85-
PDDocument document = PDDocument.load(documentFile);
86+
PDDocument document = Loader.loadPDF(documentFile);
8687
int pageCount = document.getNumberOfPages();
8788
document.close();
8889
return pageCount;

src/main/java/com/mindee/pdf/PdfCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import java.awt.image.BufferedImage;
77
import java.io.IOException;
8+
import org.apache.pdfbox.Loader;
89
import org.apache.pdfbox.pdmodel.PDDocument;
910
import org.apache.pdfbox.pdmodel.PDPage;
1011
import org.apache.pdfbox.pdmodel.PDPageContentStream;
@@ -38,7 +39,7 @@ public static byte[] compressPdf(
3839
"MINDEE WARNING: Found text inside of the provided PDF file. Compression operation aborted.");
3940
return pdfData;
4041
}
41-
try (PDDocument inputDoc = PDDocument.load(pdfData);
42+
try (PDDocument inputDoc = Loader.loadPDF(pdfData);
4243
PDDocument outputDoc = new PDDocument()) {
4344

4445
PDFRenderer pdfRenderer = new PDFRenderer(inputDoc);

src/test/java/com/mindee/input/LocalInputSourceTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.stream.Collectors;
1515
import javax.imageio.ImageIO;
1616
import org.apache.commons.codec.binary.Base64;
17+
import org.apache.pdfbox.Loader;
1718
import org.apache.pdfbox.pdmodel.PDDocument;
1819
import org.apache.pdfbox.text.PDFTextStripper;
1920
import org.junit.jupiter.api.Assertions;
@@ -338,8 +339,8 @@ public void testPdfResizeWithTextKeepsText() throws IOException {
338339
byte[] compressedWithText =
339340
PdfCompressor.compressPdf(initialWithText.getFile(), 100, true, false);
340341

341-
PDDocument originalDoc = PDDocument.load(initialWithText.getFile());
342-
PDDocument compressedDoc = PDDocument.load(compressedWithText);
342+
PDDocument originalDoc = Loader.loadPDF(initialWithText.getFile());
343+
PDDocument compressedDoc = Loader.loadPDF(compressedWithText);
343344

344345
Assertions.assertEquals(originalDoc.getNumberOfPages(), compressedDoc.getNumberOfPages());
345346
Assertions.assertNotEquals(originalDoc.hashCode(), compressedDoc.hashCode());

src/test/java/com/mindee/pdf/PDFUtilsTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import java.util.Arrays;
1111
import java.util.List;
1212
import java.util.Random;
13+
14+
import org.apache.pdfbox.Loader;
1315
import org.apache.pdfbox.pdmodel.PDDocument;
1416
import org.apache.pdfbox.pdmodel.PDPage;
1517
import org.junit.jupiter.api.Assertions;
@@ -43,7 +45,7 @@ public void givenADocumentAndListOfPages_whenMerged_thenReturnsCorrectDocument()
4345
File file = new File("src/test/resources/output/fileToTest.pdf");
4446
List<Integer> pageList = Arrays.asList(0, 2, 3, 1, 10, 2, 1);
4547
byte[] newPdf = PDFUtils.mergePdfPages(file, pageList);
46-
PDDocument document = PDDocument.load(newPdf);
48+
PDDocument document = Loader.loadPDF(newPdf);
4749

4850
Assertions.assertEquals(7, document.getNumberOfPages());
4951
document.close();

0 commit comments

Comments
 (0)