Skip to content

Commit 55f75cb

Browse files
committed
temporary Solutions for annots attachments
1. CosObjects are exposed from PDDoc and PDPage 2. xref for overriden entries are ignored. 3. helper enahnced to extract FileAttachment annots.
1 parent 6886a08 commit 55f75cb

File tree

5 files changed

+70
-3
lines changed

5 files changed

+70
-3
lines changed

src/CosDoc.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,9 @@ function read_xref_table(ps::BufferedInputStream, doc::CosDocImpl)
275275
if (v[18] != LATIN_F)
276276
ref = CosIndirectObjectRef(oid, parse(Int,String(v[12:16])))
277277

278-
doc.xref[ref] = CosObjectLoc(parse(Int,String(v[1:10])))
278+
if !haskey(doc.xref,ref)
279+
doc.xref[ref] = CosObjectLoc(parse(Int,String(v[1:10])))
280+
end
279281
end
280282

281283
oid +=1

src/CosObjStream.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ function read_xref_stream(xrefstm::CosObject,
8080
it += recsize
8181
if (record[1] != 0)
8282
count_record +=1
83-
xref[ref]=CosObjectLoc(loc,stm)
83+
if !haskey(xref,ref)
84+
xref[ref]=CosObjectLoc(loc,stm)
85+
end
8486
end
8587
end
8688
end

src/PDDoc.jl

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
export PDDoc,
22
pdDocOpen,
33
pdDocClose,
4+
pdDocGetCatalog,
5+
pdDocGetCosDoc,
46
pdDocGetPageCount,
57
pdDocGetPage
68

@@ -9,6 +11,7 @@ export PDDoc,
911
function pdDocOpen(fp::String)
1012
doc = PDDocImpl(fp)
1113
update_page_tree(doc)
14+
update_structure_tree(doc)
1215
return doc
1316
end
1417

@@ -24,6 +27,8 @@ function pdDocGetCatalog(doc::PDDoc)
2427
return doc.catalog
2528
end
2629

30+
pdDocGetCosDoc(doc::PDDoc)= doc.cosDoc
31+
2732
function pdDocGetPage(doc::PDDoc, num::Int)
2833
cosobj = find_page_from_treenode(doc.pages, num)
2934
return create_pdpage(doc, cosobj)
@@ -38,10 +43,11 @@ end
3843
cosDoc::CosDoc
3944
catalog::CosObject
4045
pages::CosObject
46+
isTagged::Symbol #Valid values :tagged, :none and :suspect
4147
function PDDocImpl(fp::String)
4248
cosDoc = cosDocOpen(fp)
4349
catalog = cosDocGetRoot(cosDoc)
44-
new(cosDoc,catalog,CosNull)
50+
new(cosDoc,catalog,CosNull,:none)
4551
end
4652
end
4753

@@ -118,3 +124,15 @@ function find_page_from_treenode(node::CosObject, pageno::Int)
118124
end
119125
end
120126
end
127+
128+
function update_structure_tree(doc::PDDocImpl)
129+
catalog = pdDocGetCatalog(doc)
130+
marking = get(catalog, CosName("MarkInfo"))
131+
132+
if (marking !== CosNull)
133+
tagged = get(marking, CosName("Marked"))
134+
suspect = get(marking, CosName("Suspect"))
135+
doc.isTagged = (suspect === CosTrue) ? (:suspect) :
136+
(tagged === CosTrue) ? (:tagged) : (:none)
137+
end
138+
end

src/PDPage.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export PDPage,
22
pdPageGetContents,
33
pdPageIsEmpty,
4+
pdPageGetCosObject,
45
pdPageGetContentObjects
56

67
@compat abstract type PDPage end
@@ -14,6 +15,8 @@ export PDPage,
1415
new(doc, cospage, contents, Nullable{PDPageObjectGroup}())
1516
end
1617

18+
pdPageGetCosObject(page)=page.cospage
19+
1720
function pdPageGetContents(page::PDPageImpl)
1821
if (page.contents === CosNull)
1922
ref = get_page_content_ref(page)

test/helpers.jl

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,45 @@ function pdfhlp_extract_doc_embedded_files(filename,dir=tempdir())
7878
pdDocClose(doc)
7979
end
8080
end
81+
82+
function pdfhlp_extract_doc_attachment_files(filename,dir=tempdir())
83+
file=rsplit(filename, '/',limit=2)
84+
filenm=file[end]
85+
dirpath=joinpath(dir,filenm)
86+
if isdir(dirpath)
87+
rm(dirpath; force=true, recursive=true)
88+
end
89+
mkdir(dirpath)
90+
doc=pdDocOpen(filename)
91+
cosDoc=pdDocGetCosDoc(doc)
92+
try
93+
npage= pdDocGetPageCount(doc)
94+
for i=1:npage
95+
page = pdDocGetPage(doc, i)
96+
cospage = pdPageGetCosObject(page)
97+
annots=cosDocGetObject(cosDoc, get(cospage, CosName("Annots")))
98+
if (annots === CosNull)
99+
continue
100+
end
101+
annotsarr=get(annots)
102+
for annot in annotsarr
103+
annotdict = cosDocGetObject(cosDoc, annot)
104+
subtype = get(annotdict,CosName("Subtype"))
105+
if (subtype == CosName("FileAttachment"))
106+
filespec=cosDocGetObject(cosDoc, get(annotdict,CosName("FS")))
107+
ef=get(filespec, CosName("EF"))
108+
filename=get(filespec,CosName("F")) #UF could be there as well.
109+
stmref=get(ef, CosName("F"))
110+
stm=cosDocGetObject(cosDoc,stmref)
111+
bufstm=decode(stm)
112+
buf=read(bufstm)
113+
close(bufstm)
114+
path=joinpath(dirpath,get(filename))
115+
write(path,buf)
116+
end
117+
end
118+
end
119+
finally
120+
pdDocClose(doc)
121+
end
122+
end

0 commit comments

Comments
 (0)