Skip to content

Commit 4329df4

Browse files
authored
Merge pull request #171 from mikehaertl/refactor-info-fields
Refactor InfoFields parsing
2 parents fd966f3 + 448386f commit 4329df4

File tree

4 files changed

+101
-52
lines changed

4 files changed

+101
-52
lines changed

src/InfoFields.php

Lines changed: 55 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ public function __toArray()
4949

5050
/**
5151
* Parse the output of dump_data into something usable.
52+
*
53+
* The expected string looks similar to this:
54+
*
5255
* InfoBegin
5356
* InfoKey: Creator
5457
* InfoValue: Adobe Acrobat Pro DC 15.0
@@ -62,67 +65,74 @@ public function __toArray()
6265
* BookmarkTitle: First bookmark
6366
* BookmarkLevel: 1
6467
* BookmarkPageNumber: 1
68+
* BookmarkBegin
69+
* BookmarkTitle: Second bookmark
70+
* BookmarkLevel: 1
71+
* BookmarkPageNumber: 2
6572
*
6673
* @param $dataString
6774
* @return array
6875
*/
6976
private function parseData($dataString)
7077
{
71-
$expectType = null;
72-
$output = array('Info' => array(),'Bookmark' => array(),'PageMedia' => array());
73-
$field = array();
74-
$buffer = array();
78+
$output = array();
7579
foreach (explode(PHP_EOL, $dataString) as $line) {
7680
$trimmedLine = trim($line);
77-
if ($trimmedLine === 'InfoBegin') {
78-
$expectType = 'Info';
79-
continue;
80-
}
81-
if ($trimmedLine === 'BookmarkBegin') {
82-
$expectType = 'Bookmark';
83-
continue;
84-
}
85-
if ($trimmedLine === 'PageMediaBegin') {
86-
$expectType = 'PageMedia';
87-
continue;
88-
}
89-
90-
preg_match('/([^:]*): ?(.*)/', $trimmedLine, $match);
91-
$key = $match[1];
92-
$value = $match[2];
93-
94-
if ($expectType === 'Info') {
95-
if ($key === 'InfoKey') {
96-
$buffer['Key'] = $value;
97-
} elseif ($key === 'InfoValue') {
98-
$buffer['Value'] = $value;
81+
// Parse blocks of the form:
82+
// AbcBegin
83+
// AbcData1: Value1
84+
// AbcData2: Value2
85+
// AbcBegin
86+
// AbcData1: Value3
87+
// AbcData2: Value4
88+
// ...
89+
if (preg_match('/^(\w+)Begin$/', $trimmedLine, $matches)) {
90+
// Previous group ended - if any - so add it to output
91+
if (!empty($group) && !empty($groupData)) {
92+
$output[$group][] = $groupData;
9993
}
100-
if (isset($buffer['Value'], $buffer['Key'])) {
101-
$output['Info'][$buffer['Key']] = $buffer['Value'];
102-
$buffer = array();
103-
$expectType = null;
94+
// Now start next group
95+
$group = $matches[1]; // Info, PageMedia, ...
96+
if (!isset($output[$group])) {
97+
$output[$group] = array();
10498
}
99+
$groupData = array();
105100
continue;
106101
}
107-
if ($expectType !== null) {
108-
if (strpos($key, $expectType) === 0) {
109-
$buffer[str_replace($expectType, '', $key)] = $value;
102+
if (!empty($group)) {
103+
// Check for AbcData1: Value1
104+
if (preg_match("/^$group(\w+): ?(.*)$/", $trimmedLine, $matches)) {
105+
$groupData[$matches[1]] = $matches[2];
106+
continue;
110107
} else {
111-
throw new \Exception("Unexpected input");
108+
// Something else, so group ended
109+
if (!empty($groupData)) {
110+
$output[$group][] = $groupData;
111+
$groupData = array();
112+
}
113+
$group = null;
112114
}
113-
if ($expectType === 'Bookmark' && isset($buffer['Level'], $buffer['Title'], $buffer['PageNumber'])) {
114-
$output[$expectType][] = $buffer;
115-
$buffer = array();
116-
$expectType = null;
117-
} elseif ($expectType === 'PageMedia' && isset($buffer['Number'], $buffer['Rotation'], $buffer['Rect'], $buffer['Dimensions'])) {
118-
$output[$expectType][] = $buffer;
119-
$buffer = array();
120-
$expectType = null;
115+
}
116+
if (preg_match('/([^:]*): ?(.*)/', $trimmedLine, $matches)) {
117+
$output[$matches[1]] = $matches[2];
118+
}
119+
}
120+
// There could be a final group left if it was not followed by another
121+
// line in the loop
122+
if (!empty($group) && !empty($groupData)) {
123+
$output[$group][] = $groupData;
124+
}
125+
126+
// Info group is a list of ['Key' => 'x', 'Value' => 'y'], so
127+
// convert it to ['x' => 'y', ...]
128+
if (isset($output['Info'])) {
129+
$data = array();
130+
foreach ($output['Info'] as $infoGroup) {
131+
if (isset($infoGroup['Key'], $infoGroup['Value'])) {
132+
$data[$infoGroup['Key']] = $infoGroup['Value'];
121133
}
122-
continue;
123-
} else {
124-
$output[$key] = $value;
125134
}
135+
$output['Info'] = $data;
126136
}
127137
return $output;
128138
}

src/InfoFile.php

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ public function __construct($data, $suffix = null, $prefix = null, $directory =
5050
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
5151
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
5252
}
53-
5453
$fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
5554
}
5655

tests/InfoFieldsTest.php

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,27 @@ public function testInfoFieldParsing()
4747
PageMediaRotation: 0
4848
PageMediaRect: 0 0 595 842
4949
PageMediaDimensions: 595 842
50+
PageLabelBegin
51+
PageLabelNewIndex: 1
52+
PageLabelStart: 1
53+
PageLabelPrefix: some name 1
54+
PageLabelNumStyle: NoNumber
55+
PageLabelBegin
56+
PageLabelNewIndex: 2
57+
PageLabelStart: 1
58+
PageLabelPrefix: some name 2
59+
PageLabelNumStyle: DecimalArabicNumerals
60+
PageLabelBegin
61+
PageLabelNewIndex: 5
62+
PageLabelStart: 1
63+
PageLabelNumStyle: LowercaseRomanNumerals
64+
PageLabelBegin
65+
PageLabelNewIndex: 6
66+
PageLabelStart: 1
67+
PageLabelPrefix: some name 3
68+
PageLabelNumStyle: NoNumber
5069
EOD;
51-
70+
5271
protected $_parsedResult = array(
5372
"Info" => array(
5473
"CreationDate" => "D:20140709121536+02'00'",
@@ -58,7 +77,6 @@ public function testInfoFieldParsing()
5877
"PdfID0" => "8b93f76a0b28b720d0dee9a6eb2a780a",
5978
"PdfID1" => "8b93f76a0b28b720d0dee9a6eb2a780a",
6079
"NumberOfPages" => "5",
61-
"Bookmark" => array(),
6280
"PageMedia" => array(
6381
array(
6482
"Number" => "1",
@@ -90,7 +108,31 @@ public function testInfoFieldParsing()
90108
"Rect" => "0 0 595 842",
91109
"Dimensions" => "595 842"
92110
),
93-
)
94-
111+
),
112+
"PageLabel" => array(
113+
array(
114+
'NewIndex' => '1',
115+
'Start' => '1',
116+
'Prefix' => 'some name 1',
117+
'NumStyle' => 'NoNumber',
118+
),
119+
array(
120+
'NewIndex' => '2',
121+
'Start' => '1',
122+
'Prefix' => 'some name 2',
123+
'NumStyle' => 'DecimalArabicNumerals',
124+
),
125+
array(
126+
'NewIndex' => '5',
127+
'Start' => '1',
128+
'NumStyle' => 'LowercaseRomanNumerals',
129+
),
130+
array(
131+
'NewIndex' => '6',
132+
'Start' => '1',
133+
'Prefix' => 'some name 3',
134+
'NumStyle' => 'NoNumber',
135+
),
136+
),
95137
);
96138
}

tests/PdfTest.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,6 @@ public function testSet40BitEncryption()
550550
public function testCanGetData()
551551
{
552552
$document = $this->getDocument1();
553-
554553
$pdf = new Pdf($document);
555554
$data = $pdf->getData();
556555
$this->assertInstanceOf('\mikehaertl\pdftk\InfoFields', $data);
@@ -664,7 +663,6 @@ protected function getOutFile()
664663
"PdfID0" => "8b93f76a0b28b720d0dee9a6eb2a780a",
665664
"PdfID1" => "8b93f76a0b28b720d0dee9a6eb2a780a",
666665
"NumberOfPages" => "5",
667-
"Bookmark" => array(),
668666
"PageMedia" => array(
669667
array(
670668
"Number" => "1",

0 commit comments

Comments
 (0)