5
5
import java .io .ByteArrayOutputStream ;
6
6
import java .io .File ;
7
7
import java .io .IOException ;
8
- import java .io .InputStream ;
9
8
import java .util .ArrayList ;
10
9
import java .util .List ;
11
10
import org .apache .pdfbox .cos .COSName ;
21
20
*/
22
21
public final class PDFUtils {
23
22
24
- private PDFUtils () {
25
- }
23
+ private PDFUtils () {}
26
24
27
- private static int countPDDocumentPages (PDDocument document ) throws IOException {
25
+ /**
26
+ * Get the number of pages in the PDF.
27
+ * @param inputSource The PDF file.
28
+ */
29
+ public static int getNumberOfPages (LocalInputSource inputSource ) throws IOException {
30
+ PDDocument document = PDDocument .load (inputSource .getFile ());
28
31
int pageCount = document .getNumberOfPages ();
29
32
document .close ();
30
33
return pageCount ;
31
34
}
32
35
33
- public static int countPdfPages (InputStream inputStream ) throws IOException {
34
- try {
35
- PDDocument document = PDDocument .load (inputStream );
36
- int pageCount = countPDDocumentPages (document );
37
- document .close ();
38
- return pageCount ;
39
- } finally {
40
- inputStream .close ();
41
- }
42
- }
43
-
44
36
private static byte [] createPdfFromExistingPdf (
45
37
PDDocument document ,
46
38
List <Integer > pageNumbers
@@ -61,6 +53,11 @@ private static byte[] createPdfFromExistingPdf(
61
53
return output ;
62
54
}
63
55
56
+ /**
57
+ * Merge specified PDF pages together.
58
+ * @param file The PDF file.
59
+ * @param pageNumbers Lit of page numbers to merge together.
60
+ */
64
61
public static byte [] mergePdfPages (
65
62
File file ,
66
63
List <Integer > pageNumbers
@@ -74,7 +71,6 @@ public static boolean isPdfEmpty(File file) throws IOException {
74
71
}
75
72
76
73
private static boolean checkIfPdfIsEmpty (PDDocument document ) throws IOException {
77
-
78
74
boolean isEmpty = true ;
79
75
for (PDPage page : document .getPages ()) {
80
76
PDResources resources = page .getResources ();
@@ -97,29 +93,80 @@ private static boolean checkIfPdfIsEmpty(PDDocument document) throws IOException
97
93
return isEmpty ;
98
94
}
99
95
96
+ /**
97
+ * Render all pages of a PDF as images.
98
+ * Converting PDFs with hundreds of pages may result in a heap space error.
99
+ * @param filePath The path to the PDF file.
100
+ * @return List of all pages as images.
101
+ */
100
102
public static List <PdfPageImage > pdfToImages (String filePath ) throws IOException {
101
103
return pdfToImages (new LocalInputSource (filePath ));
102
104
}
103
105
106
+ /**
107
+ * Render all pages of a PDF as images.
108
+ * Converting PDFs with hundreds of pages may result in a heap space error.
109
+ * @param source The PDF file.
110
+ * @return List of all pages as images.
111
+ */
104
112
public static List <PdfPageImage > pdfToImages (LocalInputSource source ) throws IOException {
105
113
PDDocument document = PDDocument .load (source .getFile ());
106
114
PDFRenderer pdfRenderer = new PDFRenderer (document );
107
115
List <PdfPageImage > pdfPageImages = new ArrayList <>();
108
116
for (int i = 0 ; i < document .getNumberOfPages (); i ++) {
109
- PDRectangle bbox = document .getPage (i ).getBBox ();
110
- float dimension = bbox .getWidth () * bbox .getHeight ();
111
- int dpi ;
112
- if (dimension < 200000 ) {
113
- dpi = 300 ;
114
- } else if (dimension < 300000 ) {
115
- dpi = 250 ;
116
- } else {
117
- dpi = 200 ;
118
- }
119
- BufferedImage imageBuffer = pdfRenderer .renderImageWithDPI (i , dpi , ImageType .RGB );
117
+ BufferedImage imageBuffer = pdfPageToImageBuffer (i , document , pdfRenderer );
120
118
pdfPageImages .add (new PdfPageImage (imageBuffer , i , source .getFilename (), "jpg" ));
121
119
}
122
120
document .close ();
123
121
return pdfPageImages ;
124
122
}
123
+
124
+ /**
125
+ * Render a single page of a PDF as an image.
126
+ * Main use case is for processing PDFs with hundreds of pages.
127
+ * If you need to only render some pages from the PDF, use <code>mergePdfPages</code> and then <code>pdfToImages</code>.
128
+ * @param filePath The path to the PDF file.
129
+ * @param pageNumber The page number to render, first page is 1.
130
+ * @return The page as an image.
131
+ */
132
+ public static PdfPageImage pdfPageToImage (String filePath , int pageNumber ) throws IOException {
133
+ return pdfPageToImage (new LocalInputSource (filePath ), pageNumber );
134
+ }
135
+
136
+ /**
137
+ * Render a single page of a PDF as an image.
138
+ * Main use case is for processing PDFs with hundreds of pages.
139
+ * If you need to only render some pages from the PDF, use <code>mergePdfPages</code> and then <code>pdfToImages</code>.
140
+ * @param source The PDF file.
141
+ * @param pageNumber The page number to render, first page is 1.
142
+ * @return The page as an image.
143
+ */
144
+ public static PdfPageImage pdfPageToImage (
145
+ LocalInputSource source ,
146
+ int pageNumber
147
+ ) throws IOException {
148
+ int index = pageNumber - 1 ;
149
+ PDDocument document = PDDocument .load (source .getFile ());
150
+ PDFRenderer pdfRenderer = new PDFRenderer (document );
151
+ BufferedImage imageBuffer = pdfPageToImageBuffer (index , document , pdfRenderer );
152
+ return new PdfPageImage (imageBuffer , index , source .getFilename (), "jpg" );
153
+ }
154
+
155
+ private static BufferedImage pdfPageToImageBuffer (
156
+ int index ,
157
+ PDDocument document ,
158
+ PDFRenderer pdfRenderer
159
+ ) throws IOException {
160
+ PDRectangle bbox = document .getPage (index ).getBBox ();
161
+ float dimension = bbox .getWidth () * bbox .getHeight ();
162
+ int dpi ;
163
+ if (dimension < 200000 ) {
164
+ dpi = 300 ;
165
+ } else if (dimension < 300000 ) {
166
+ dpi = 250 ;
167
+ } else {
168
+ dpi = 200 ;
169
+ }
170
+ return pdfRenderer .renderImageWithDPI (index , dpi , ImageType .RGB );
171
+ }
125
172
}
0 commit comments