@@ -33,3 +33,48 @@ function pdfhlp_extract_doc_content_to_dir(filename,dir=tempdir())
33
33
pdDocClose (doc)
34
34
end
35
35
end
36
+
37
+ function pdfhlp_extract_doc_embedded_files (filename,dir= tempdir ())
38
+ file= rsplit (filename, ' /' ,limit= 2 )
39
+ filenm= file[end ]
40
+ dirpath= joinpath (dir,filenm)
41
+ if isdir (dirpath)
42
+ rm (dirpath; force= true , recursive= true )
43
+ end
44
+ mkdir (dirpath)
45
+ doc= pdDocOpen (filename)
46
+ try
47
+ catalog = pdDocGetCatalog (doc)
48
+ names = get (catalog, CosName (" Names" ))
49
+ cosDoc = pdDocGetCosDoc (doc)
50
+ nmdict = cosDocGetObject (cosDoc, names)
51
+ println (nmdict)
52
+ if nmdict != = CosNull
53
+ efref = get (nmdict, CosName (" EmbeddedFiles" ))
54
+ efroot = cosDocGetObject (cosDoc, efref)
55
+ # simple case no tree just a few files attached in the root node.
56
+ # A proper implementation needs full names tree traversal.
57
+ efarr = get (efroot, CosName (" Names" ))
58
+ data = get (efarr)
59
+ len= length (data)
60
+ println (len)
61
+ for i= 1 : len: 2
62
+ key= data[i]
63
+ println (key)
64
+ val= data[i+ 1 ]
65
+ filespec= cosDocGetObject (cosDoc, val)
66
+ ef= get (filespec, CosName (" EF" ))
67
+ filename= get (filespec,CosName (" F" )) # UF could be there as well.
68
+ stmref= get (ef, CosName (" F" ))
69
+ stm= cosDocGetObject (cosDoc,stmref)
70
+ bufstm= decode (stm)
71
+ buf= read (bufstm)
72
+ close (bufstm)
73
+ path= joinpath (dirpath,get (filename))
74
+ write (path,buf)
75
+ end
76
+ end
77
+ finally
78
+ pdDocClose (doc)
79
+ end
80
+ end
0 commit comments