Skip to content

Commit ae9cfc1

Browse files
committed
Issue #76 Refactor DataFields class
1 parent 80f1aec commit ae9cfc1

File tree

3 files changed

+267
-123
lines changed

3 files changed

+267
-123
lines changed

src/DataFields.php

Lines changed: 86 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55
use ArrayObject;
66

77
/**
8-
* Class DataFields
8+
* This class is an array representation of the dump_data_fields output of
9+
* pdftk.
910
*
1011
* @author Ray Holland <raymondaholland+php-pdftk@gmail.com>
12+
* @author Michael Härtl <haertl.mike@gmail.com>
13+
* @license http://www.opensource.org/licenses/MIT
1114
*/
1215
class DataFields extends ArrayObject
1316
{
1417
private $_string;
15-
1618
private $_array;
1719

1820
/**
@@ -25,7 +27,7 @@ class DataFields extends ArrayObject
2527
public function __construct($input = null, $flags = 0, $iterator_class = "ArrayIterator")
2628
{
2729
$this->_string = $input ?: '';
28-
$this->_array = $this->parseData($this->_string);
30+
$this->_array = self::parse($this->_string);
2931

3032
return parent::__construct($this->_array, $flags, $iterator_class);
3133
}
@@ -47,114 +49,107 @@ public function __toArray()
4749
}
4850

4951
/**
50-
* Parse the output of dump_data_fields into something usable.
51-
* Derived from: http://stackoverflow.com/a/34864936/744228
52-
* Example input (includes '---' line):
52+
* Parse the output of dump_data_fields into an array.
53+
*
54+
* The string to parse can either be a single block of `Xyz:value` lines
55+
* or a set of such blocks, separated by and starting with `---`.
56+
*
57+
*
58+
* Here's an example:
59+
*
60+
* ```
5361
* ---
5462
* FieldType: Text
5563
* FieldName: Text1
5664
* FieldFlags: 0
5765
* FieldValue: University of Missouri : Ray-Holland
66+
* extended line value
5867
* FieldValueDefault: University of Missouri : Ray-Holland
68+
* extended line2 value
5969
* FieldJustification: Left
6070
* FieldMaxLength: 99
71+
* ---
72+
* FieldType: Text
73+
* FieldName: Text2
74+
* ...
75+
* ...
76+
* ```
6177
*
62-
* @param $dataString
63-
* @return array
78+
* @param $input the string to parse
79+
* @return array the parsed result
6480
*/
65-
private function parseData($dataString)
81+
public static function parse($input)
6682
{
67-
$output = array();
68-
$field = array();
69-
$currentField = "";
70-
foreach (explode("\n", $dataString) as $line) {
71-
$trimmedLine = trim($line);
83+
if (strncmp('---', $input, 3) === 0) {
84+
// Split blocks only if '---' is followed by 'FieldType'
85+
$blocks = preg_split(
86+
'/^---(\r\n|\n|\r)(?=FieldType:)/m',
87+
substr($input,3 )
88+
);
89+
return array_map('\mikehaertl\pdftk\DataFields::parseBlock', $blocks);
90+
} else {
91+
return self::parseBlock($input);
92+
}
93+
}
7294

73-
// ($trimmedLine === '' && $currentField != 'FieldValue')
74-
// Don't start new field for an empty line in a multi-line FieldValue
75-
if ($trimmedLine === '---' || ($currentField !== 'FieldValue' && $trimmedLine === '')) {
76-
// Block completed; process it
77-
if (sizeof($field) > 0) {
78-
$output[] = $field;
95+
/**
96+
* Parses a block of this form:
97+
*
98+
* ```
99+
* Name1: Value1
100+
* Name2: Value2
101+
* Name3: Value3
102+
* ...
103+
* ```
104+
*
105+
* @param string $block the block to parse
106+
* @return array the parsed block values indexed by respective names
107+
*/
108+
public static function parseBlock($block)
109+
{
110+
$data = [];
111+
$lines = preg_split("/(\r\n|\n|\r)/", trim($block));
112+
$continueKey = null;
113+
foreach($lines as $n => $line) {
114+
if ($continueKey !== null) {
115+
$data[$continueKey] .= "\n" . $line;
116+
if (!self::lineContinues($lines, $n, $continueKey)) {
117+
$continueKey = null;
118+
}
119+
} elseif (preg_match('/([^:]*): ?(.*)/', $line, $match)) {
120+
$key = $match[1];
121+
$value = $match[2];
122+
// Convert multiple keys like 'FieldStateOption' to array
123+
if (isset($data[$key])) {
124+
$data[$key] = (array) $data[$key];
125+
$data[$key][] = $value;
126+
} else {
127+
$data[$key] = $value;
128+
}
129+
if (self::lineContinues($lines, $n, $key)) {
130+
$continueKey = $key;
79131
}
80-
$field = array();
81-
continue;
82-
}
83-
84-
// Process contents of data block
85-
$parts = explode(':', $line);
86-
$key = null;
87-
$value = null;
88-
89-
//Continue through lines already processed from FieldValue
90-
if($currentField === 'FieldValue'
91-
&& $parts[0] !== 'FieldJustification'
92-
&& !empty($field['FieldValue'])){
93-
94-
continue;
95-
}
96-
97-
// Handle colon in the value
98-
if (sizeof($parts) !== 2) {
99-
$key = $parts[0];
100-
unset($parts[0]);
101-
$value = implode(':', $parts);
102-
}
103-
104-
$key = $key ?: trim($parts[0]);
105-
$value = $value ?: trim($parts[1]);
106-
107-
if ($currentField === 'FieldValue' && !empty($value)) {
108-
$value = $this->getFieldValue($line,$dataString);
109-
} else if ($currentField === 'FieldValue'){
110-
$value = "";
111-
}
112-
113-
if (isset($field[$key])) {
114-
$field[$key] = (array) $field[$key];
115-
$field[$key][] = $value;
116-
}
117-
else {
118-
$field[$key] = $value;
119132
}
120133
}
121-
122-
// process final block
123-
if (sizeof($field) > 0) {
124-
$output[] = $field;
125-
}
126-
127-
return $output;
134+
return $data;
128135
}
129136

130137
/**
131-
* Parses a FieldValue for Multiple Lines e.g.
132-
* FieldValue: Text
133-
*
134-
* MoreText
135-
* Something
136-
* ExtraText
137-
* OtherText
138+
* Checks whether the value for the given line number continues on the next
139+
* line. This is the case if the next line does not start with either
140+
* 'FieldValueDefault:' or 'FieldJustification:'.
138141
*
139-
* FieldJustification: Left
140-
*
141-
* @param string $line The current line being searched
142-
* @param string $dataString
143-
* @return bool|string Returns a string containing the value for FieldValue e.g. Text\n\nMoreText\nSomething etc.
142+
* @param array $lines all lines of the block
143+
* @param int $n the 0-based index of the current line
144+
* @param string the key for the value. Only 'FieldValue' and
145+
* 'FieldValueDefault' can span multiple lines
146+
* @return bool whether the value continues in line n + 1
144147
*/
145-
private function getFieldValue($line, $dataString)
148+
protected static function lineContinues($lines, $n, $key)
146149
{
147-
// Offset 'FieldValue:'
148-
$pos1 = strpos($dataString, $line) + 11;
149-
$pos2 = strpos($dataString, "FieldJustification", $pos1);
150-
$length = $pos2 - $pos1;
151-
152-
$value = substr(
153-
$dataString,
154-
$pos1,
155-
$length
156-
);
157-
158-
return $value;
150+
return
151+
in_array($key, ['FieldValue', 'FieldValueDefault']) &&
152+
array_key_exists($n + 1, $lines) &&
153+
!preg_match('/^Field(ValueDefault|Justification):/', $lines[$n + 1]);
159154
}
160155
}

tests/DataFieldsTest.php

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
<?php
2+
use mikehaertl\pdftk\DataFields;
3+
4+
class DataFieldsTest extends \PHPUnit\Framework\TestCase
5+
{
6+
public function testDataFieldParsing()
7+
{
8+
$dataFields = new DataFields($this->_testInput);
9+
//print_r($dataFields->__toArray());exit;
10+
$this->assertEquals($this->_parsedResult, $dataFields->__toArray());
11+
}
12+
13+
protected $_testInput = <<<DATA
14+
---
15+
FieldType: Text
16+
FieldName: field1
17+
FieldNameAlt: field1_alt
18+
FieldFlags: 0
19+
FieldJustification: Left
20+
---
21+
FieldType: Text
22+
FieldName: field2
23+
FieldNameAlt: field2_alt
24+
FieldFlags: 0
25+
FieldValue: value:with:colons
26+
FieldJustification: Left
27+
---
28+
FieldType: Text
29+
FieldName: field3
30+
FieldNameAlt: field3_alt
31+
FieldFlags: 0
32+
FieldValue:
33+
FieldJustification: Left
34+
---
35+
FieldType: Text
36+
FieldName: field4
37+
FieldNameAlt: field4_alt
38+
FieldFlags: 0
39+
FieldValue: field:with:colons
40+
41+
---more:colons:
42+
and
43+
multi lines
44+
45+
FieldJustification: Left
46+
---
47+
FieldType: Text
48+
FieldName: field5
49+
FieldNameAlt: field5_alt
50+
FieldFlags: 0
51+
FieldValue: field:with:colons
52+
53+
---more:colons:
54+
and
55+
multi lines
56+
57+
FieldValueDefault: default:with:colons
58+
59+
---more:colons:
60+
and
61+
multi lines
62+
63+
FieldJustification: Left
64+
DATA;
65+
66+
protected $_parsedResult = [
67+
[
68+
'FieldType' => 'Text',
69+
'FieldName' => 'field1',
70+
'FieldNameAlt' => 'field1_alt',
71+
'FieldFlags' => 0,
72+
'FieldJustification' => 'Left',
73+
],
74+
[
75+
'FieldType' => 'Text',
76+
'FieldName' => 'field2',
77+
'FieldNameAlt' => 'field2_alt',
78+
'FieldFlags' => 0,
79+
'FieldValue' => 'value:with:colons',
80+
'FieldJustification' => 'Left',
81+
],
82+
[
83+
'FieldType' => 'Text',
84+
'FieldName' => 'field3',
85+
'FieldNameAlt' => 'field3_alt',
86+
'FieldFlags' => 0,
87+
'FieldValue' => '',
88+
'FieldJustification' => 'Left',
89+
],
90+
[
91+
'FieldType' => 'Text',
92+
'FieldName' => 'field4',
93+
'FieldNameAlt' => 'field4_alt',
94+
'FieldFlags' => 0,
95+
'FieldValue' => "field:with:colons\n\n---more:colons:\nand\nmulti lines\n",
96+
'FieldJustification' => 'Left',
97+
],
98+
[
99+
'FieldType' => 'Text',
100+
'FieldName' => 'field5',
101+
'FieldNameAlt' => 'field5_alt',
102+
'FieldFlags' => 0,
103+
'FieldValue' => "field:with:colons\n\n---more:colons:\nand\nmulti lines\n",
104+
'FieldValueDefault' => "default:with:colons\n\n---more:colons:\nand\nmulti lines\n",
105+
'FieldJustification' => 'Left',
106+
]
107+
];
108+
}

0 commit comments

Comments
 (0)