Skip to content

Commit 229cdb4

Browse files
committed
getData formats data in a usable way, update_info implemented
1 parent 6118b75 commit 229cdb4

File tree

7 files changed

+408
-5
lines changed

7 files changed

+408
-5
lines changed

src/InfoFields.php

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
<?php
2+
3+
namespace mikehaertl\pdftk;
4+
5+
use ArrayObject;
6+
7+
/**
8+
* Class InfoFields
9+
* Derived from DataFields
10+
*
11+
* @author Burak USGURLU <burak@uskur.com.tr>
12+
* @license http://www.opensource.org/licenses/MIT
13+
*/
14+
class InfoFields extends ArrayObject
15+
{
16+
private $_string;
17+
18+
private $_array;
19+
20+
/**
21+
* InfoFields constructor.
22+
*
23+
* @param string $input
24+
* @param int $flags
25+
* @param string $iterator_class
26+
*/
27+
public function __construct($input = null, $flags = 0, $iterator_class = "ArrayIterator")
28+
{
29+
$this->_string = $input ?: '';
30+
$this->_array = $this->parseData($this->_string);
31+
32+
return parent::__construct($this->_array, $flags, $iterator_class);
33+
}
34+
35+
/**
36+
* @return string
37+
*/
38+
public function __toString()
39+
{
40+
return $this->_string;
41+
}
42+
43+
/**
44+
* @return array
45+
*/
46+
public function __toArray()
47+
{
48+
return $this->_array;
49+
}
50+
51+
/**
52+
* Parse the output of dump_data into something usable.
53+
* InfoBegin
54+
* InfoKey: Creator
55+
* InfoValue: Adobe Acrobat Pro DC 15.0
56+
* InfoBegin
57+
* InfoKey: Producer
58+
* InfoValue: XYZ
59+
* PdfID0: 1fdce9ed1153ab4c973334b512a67997
60+
* PdfID1: c7acc878cda02ad7bb401fa8080a8929
61+
* NumberOfPages: 11
62+
* BookmarkBegin
63+
* BookmarkTitle: First bookmark
64+
* BookmarkLevel: 1
65+
* BookmarkPageNumber: 1
66+
*
67+
* @param $dataString
68+
* @return array
69+
*/
70+
private function parseData($dataString)
71+
{
72+
$expectType = null;
73+
$output = array('Info'=>array(),'Bookmark'=>array(),'PageMedia'=>array());
74+
$field = array();
75+
$buffer = array();
76+
foreach (explode(PHP_EOL, $dataString) as $line) {
77+
$trimmedLine = trim($line);
78+
if($trimmedLine === 'InfoBegin') {
79+
$expectType = 'Info';
80+
continue;
81+
}
82+
if($trimmedLine === 'BookmarkBegin') {
83+
$expectType = 'Bookmark';
84+
continue;
85+
}
86+
if($trimmedLine === 'PageMediaBegin') {
87+
$expectType = 'PageMedia';
88+
continue;
89+
}
90+
91+
preg_match('/([^:]*): ?(.*)/', $trimmedLine, $match);
92+
$key = $match[1];
93+
$value = $match[2];
94+
95+
if($expectType == 'Info'){
96+
if($key == 'InfoKey') {
97+
$buffer['Key'] = $value;
98+
}
99+
elseif($key == 'InfoValue') {
100+
$buffer['Value'] = $value;
101+
}
102+
if(isset($buffer['Value']) && isset($buffer['Key'])) {
103+
$output['Info'][$buffer['Key']] = $buffer['Value'];
104+
$buffer = array();
105+
$expectType = null;
106+
}
107+
continue;
108+
}
109+
if(!is_null($expectType)){
110+
if(strpos($key, $expectType) === 0) {
111+
$buffer[str_replace($expectType, '', $key)] = $value;
112+
}
113+
else{
114+
throw new \Exception("Unexpected input");
115+
}
116+
if($expectType == 'Bookmark' && isset($buffer['Level']) && isset($buffer['Title']) && isset($buffer['PageNumber'])) {
117+
$output[$expectType][] = $buffer;
118+
$buffer = array();
119+
$expectType = null;
120+
}
121+
elseif($expectType == 'PageMedia' && isset($buffer['Number']) && isset($buffer['Rotation']) && isset($buffer['Rect']) && isset($buffer['Dimensions'])) {
122+
$output[$expectType][] = $buffer;
123+
$buffer = array();
124+
$expectType = null;
125+
}
126+
continue;
127+
}
128+
else{
129+
$output[$key] = $value;
130+
}
131+
}
132+
return $output;
133+
}
134+
135+
}

src/InfoFile.php

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?php
2+
namespace mikehaertl\pdftk;
3+
4+
use mikehaertl\tmp\File;
5+
6+
/**
7+
* InfoFile
8+
*
9+
* This class represents a temporary dump_data compatible file that can be used to update meta data of PDF
10+
* with valid unicode characters.
11+
*
12+
* @author Burak Usgurlu <burak@uskur.com.tr>
13+
* @license http://www.opensource.org/licenses/MIT
14+
*/
15+
class InfoFile extends File
16+
{
17+
18+
/**
19+
* Constructor
20+
*
21+
* @param array $data the form data as name => value
22+
* @param string|null $suffix the optional suffix for the tmp file
23+
* @param string|null $suffix the optional prefix for the tmp file. If null 'php_tmpfile_' is used.
24+
* @param string|null $directory directory where the file should be created. Autodetected if not provided.
25+
* @param string|null $encoding of the data. Default is 'UTF-8'.
26+
*/
27+
public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8')
28+
{
29+
if ($directory===null) {
30+
$directory = self::getTempDir();
31+
}
32+
$suffix = '.txt';
33+
$prefix = 'php_pdftk_info_';
34+
35+
$this->_fileName = tempnam($directory,$prefix);
36+
$newName = $this->_fileName.$suffix;
37+
rename($this->_fileName, $newName);
38+
$this->_fileName = $newName;
39+
40+
if (!function_exists('mb_convert_encoding')) {
41+
throw new \Exception('MB extension required.');
42+
}
43+
44+
$fields = '';
45+
foreach ($data as $key=>$value) {
46+
// Always convert to UTF-8
47+
if ($encoding!=='UTF-8' && function_exists('mb_convert_encoding')) {
48+
$value = mb_convert_encoding($value,'UTF-8', $encoding);
49+
$key = mb_convert_encoding($key,'UTF-8', $encoding);
50+
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
51+
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
52+
}
53+
54+
$fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
55+
}
56+
57+
// Use fwrite, since file_put_contents() messes around with character encoding
58+
$fp = fopen($this->_fileName, 'w');
59+
fwrite($fp, $fields);
60+
fclose($fp);
61+
}
62+
}

src/Pdf.php

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,27 @@ public function fillForm($data, $encoding = 'UTF-8', $dropXfa = true, $format =
263263
}
264264
return $this;
265265
}
266+
267+
/**
268+
* Update meta data of PDF
269+
*
270+
* @param string|array $data either a InfoFile filename or an array with
271+
* form field data (name => value)
272+
* @param string the encoding of the data. Default is 'UTF-8'.
273+
* @return \mikehaertl\pdftk\Pdf the pdf instance for method chaining
274+
*/
275+
public function updateInfo($data, $encoding = 'UTF-8')
276+
{
277+
$this->constrainSingleFile();
278+
if (is_array($data)) {
279+
$data = new \mikehaertl\pdftk\InfoFile($data, null, null, null, $encoding);
280+
}
281+
$this->getCommand()
282+
->setOperation($encoding == 'UTF-8' ? 'update_info_utf8' : 'update_info')
283+
->setOperationArgument($data, true);
284+
285+
return $this;
286+
}
266287

267288
/**
268289
* Apply a PDF as watermark to the background of a single PDF file.
@@ -340,7 +361,7 @@ public function multiStamp($file)
340361
/**
341362
* @param bool $utf8 whether to dump the data UTF-8 encoded. Default is
342363
* true.
343-
* @return string|bool meta data about the PDF or false on failure
364+
* @return InfoFields|bool meta data about the PDF or false on failure
344365
*/
345366
public function getData($utf8 = true)
346367
{
@@ -351,7 +372,7 @@ public function getData($utf8 = true)
351372
if (!$command->execute()) {
352373
return false;
353374
} else {
354-
$this->$property = trim($command->getOutput());
375+
$this->$property = new InfoFields(trim($command->getOutput()));
355376
}
356377
}
357378
return $this->$property;

tests/InfoFieldsTest.php

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
<?php
2+
use mikehaertl\pdftk\InfoFields;
3+
4+
class InfoFieldsTest extends \PHPUnit\Framework\TestCase
5+
{
6+
public function testInfoFieldParsing()
7+
{
8+
$infoFields = new InfoFields($this->_testInput);
9+
$this->assertEquals($this->_parsedResult, $infoFields->__toArray());
10+
}
11+
12+
protected $_testInput = <<<EOD
13+
InfoBegin
14+
InfoKey: CreationDate
15+
InfoValue: D:20140709121536+02'00'
16+
InfoBegin
17+
InfoKey: Creator
18+
InfoValue: Writer
19+
InfoBegin
20+
InfoKey: Producer
21+
InfoValue: LibreOffice 4.2
22+
PdfID0: 8b93f76a0b28b720d0dee9a6eb2a780a
23+
PdfID1: 8b93f76a0b28b720d0dee9a6eb2a780a
24+
NumberOfPages: 5
25+
PageMediaBegin
26+
PageMediaNumber: 1
27+
PageMediaRotation: 0
28+
PageMediaRect: 0 0 595 842
29+
PageMediaDimensions: 595 842
30+
PageMediaBegin
31+
PageMediaNumber: 2
32+
PageMediaRotation: 0
33+
PageMediaRect: 0 0 595 842
34+
PageMediaDimensions: 595 842
35+
PageMediaBegin
36+
PageMediaNumber: 3
37+
PageMediaRotation: 0
38+
PageMediaRect: 0 0 595 842
39+
PageMediaDimensions: 595 842
40+
PageMediaBegin
41+
PageMediaNumber: 4
42+
PageMediaRotation: 0
43+
PageMediaRect: 0 0 595 842
44+
PageMediaDimensions: 595 842
45+
PageMediaBegin
46+
PageMediaNumber: 5
47+
PageMediaRotation: 0
48+
PageMediaRect: 0 0 595 842
49+
PageMediaDimensions: 595 842
50+
EOD;
51+
52+
protected $_parsedResult = array(
53+
"Info" => array(
54+
"CreationDate" => "D:20140709121536+02'00'",
55+
"Creator" => "Writer",
56+
"Producer" => "LibreOffice 4.2"
57+
),
58+
"PdfID0" => "8b93f76a0b28b720d0dee9a6eb2a780a",
59+
"PdfID1" => "8b93f76a0b28b720d0dee9a6eb2a780a",
60+
"NumberOfPages" => "5",
61+
"Bookmark" => array(),
62+
"PageMedia" => array(
63+
array(
64+
"Number" => "1",
65+
"Rotation" => "0",
66+
"Rect" => "0 0 595 842",
67+
"Dimensions" => "595 842"
68+
),
69+
array(
70+
"Number" => "2",
71+
"Rotation" => "0",
72+
"Rect" => "0 0 595 842",
73+
"Dimensions" => "595 842"
74+
),
75+
array(
76+
"Number" => "3",
77+
"Rotation" => "0",
78+
"Rect" => "0 0 595 842",
79+
"Dimensions" => "595 842"
80+
),
81+
array(
82+
"Number" => "4",
83+
"Rotation" => "0",
84+
"Rect" => "0 0 595 842",
85+
"Dimensions" => "595 842"
86+
),
87+
array(
88+
"Number" => "5",
89+
"Rotation" => "0",
90+
"Rect" => "0 0 595 842",
91+
"Dimensions" => "595 842"
92+
),
93+
)
94+
95+
);
96+
97+
}

tests/InfoFileTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?php
2+
use mikehaertl\pdftk\InfoFile;
3+
4+
class InfoFileTest extends \PHPUnit\Framework\TestCase
5+
{
6+
public function testInfoFileCreation() {
7+
$data = array(
8+
'Creator' => 'php-pdftk',
9+
'Subject' => "öäüÖÄÜ",
10+
);
11+
12+
$oInfoFile = new InfoFile($data, null, null, __DIR__);
13+
$sInfoFilename = $oInfoFile->getFileName();
14+
15+
$this->assertFileExists($sInfoFilename);
16+
$this->assertFileEquals(__DIR__."/files/InfoFileTest.txt", $sInfoFilename);
17+
}
18+
}

0 commit comments

Comments
 (0)