Skip to content

Commit 6886a08

Browse files
committed
extract embedded file.
This will not work for file attachments.
1 parent 3a9bb72 commit 6886a08

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

test/helpers.jl

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,48 @@ function pdfhlp_extract_doc_content_to_dir(filename,dir=tempdir())
3333
pdDocClose(doc)
3434
end
3535
end
36+
37+
function pdfhlp_extract_doc_embedded_files(filename,dir=tempdir())
38+
file=rsplit(filename, '/',limit=2)
39+
filenm=file[end]
40+
dirpath=joinpath(dir,filenm)
41+
if isdir(dirpath)
42+
rm(dirpath; force=true, recursive=true)
43+
end
44+
mkdir(dirpath)
45+
doc=pdDocOpen(filename)
46+
try
47+
catalog = pdDocGetCatalog(doc)
48+
names = get(catalog, CosName("Names"))
49+
cosDoc = pdDocGetCosDoc(doc)
50+
nmdict = cosDocGetObject(cosDoc, names)
51+
println(nmdict)
52+
if nmdict !== CosNull
53+
efref = get(nmdict, CosName("EmbeddedFiles"))
54+
efroot = cosDocGetObject(cosDoc, efref)
55+
#simple case no tree just a few files attached in the root node.
56+
#A proper implementation needs full names tree traversal.
57+
efarr = get(efroot, CosName("Names"))
58+
data = get(efarr)
59+
len=length(data)
60+
println(len)
61+
for i=1:len:2
62+
key=data[i]
63+
println(key)
64+
val=data[i+1]
65+
filespec=cosDocGetObject(cosDoc, val)
66+
ef=get(filespec, CosName("EF"))
67+
filename=get(filespec,CosName("F")) #UF could be there as well.
68+
stmref=get(ef, CosName("F"))
69+
stm=cosDocGetObject(cosDoc,stmref)
70+
bufstm=decode(stm)
71+
buf=read(bufstm)
72+
close(bufstm)
73+
path=joinpath(dirpath,get(filename))
74+
write(path,buf)
75+
end
76+
end
77+
finally
78+
pdDocClose(doc)
79+
end
80+
end

0 commit comments

Comments
 (0)