@@ -90,7 +90,7 @@ The above mentioned code takes a PDF file `src` as input and writes the text dat
90
90
``` julia {.line_numbers}
91
91
"""
92
92
```
93
- getPDFText(src, out) -> Dict
93
+ getPDFText(src, out) -> Dict
94
94
```
95
95
- src - Input PDF file from where text is to be extracted
96
96
- out - Output TXT file where the output will be written
@@ -99,27 +99,27 @@ return - A dictionary containing metadata of the document
99
99
function getPDFText (src, out)
100
100
# handle that can be used for subsequence operations on the document.
101
101
doc = pdDocOpen (src)
102
-
102
+
103
103
# Metadata extracted from the PDF document.
104
- # This value is retained and returned as the return from the function.
104
+ # This value is retained and returned as the return from the function.
105
105
docinfo = pdDocGetInfo (doc)
106
106
open (out, " w" ) do io
107
-
108
- # Returns number of pages in the document
109
- npage = pdDocGetPageCount (doc)
107
+
108
+ # Returns number of pages in the document
109
+ npage = pdDocGetPageCount (doc)
110
110
111
111
for i= 1 : npage
112
-
112
+
113
113
# handle to the specific page given the number index.
114
114
page = pdDocGetPage (doc, i)
115
-
116
- # Extract text from the page and write it to the output file.
115
+
116
+ # Extract text from the page and write it to the output file.
117
117
pdPageExtractText (io, page)
118
118
119
- end
119
+ end
120
120
end
121
- # Close the document handle.
122
- # The doc handle should not be used after this call
121
+ # Close the document handle.
122
+ # The doc handle should not be used after this call
123
123
pdDocClose (doc)
124
124
return docinfo
125
125
end
0 commit comments