5
5
use ArrayObject ;
6
6
7
7
/**
8
- * Class DataFields
8
+ * This class is an array representation of the dump_data_fields output of
9
+ * pdftk.
9
10
*
10
11
* @author Ray Holland <raymondaholland+php-pdftk@gmail.com>
12
+ * @author Michael Härtl <haertl.mike@gmail.com>
13
+ * @license http://www.opensource.org/licenses/MIT
11
14
*/
12
15
class DataFields extends ArrayObject
13
16
{
14
17
private $ _string ;
15
-
16
18
private $ _array ;
17
19
18
20
/**
@@ -25,7 +27,7 @@ class DataFields extends ArrayObject
25
27
public function __construct ($ input = null , $ flags = 0 , $ iterator_class = "ArrayIterator " )
26
28
{
27
29
$ this ->_string = $ input ?: '' ;
28
- $ this ->_array = $ this -> parseData ($ this ->_string );
30
+ $ this ->_array = self :: parse ($ this ->_string );
29
31
30
32
return parent ::__construct ($ this ->_array , $ flags , $ iterator_class );
31
33
}
@@ -47,114 +49,107 @@ public function __toArray()
47
49
}
48
50
49
51
/**
50
- * Parse the output of dump_data_fields into something usable.
51
- * Derived from: http://stackoverflow.com/a/34864936/744228
52
- * Example input (includes '---' line):
52
+ * Parse the output of dump_data_fields into an array.
53
+ *
54
+ * The string to parse can either be a single block of `Xyz:value` lines
55
+ * or a set of such blocks, separated by and starting with `---`.
56
+ *
57
+ *
58
+ * Here's an example:
59
+ *
60
+ * ```
53
61
* ---
54
62
* FieldType: Text
55
63
* FieldName: Text1
56
64
* FieldFlags: 0
57
65
* FieldValue: University of Missouri : Ray-Holland
66
+ * extended line value
58
67
* FieldValueDefault: University of Missouri : Ray-Holland
68
+ * extended line2 value
59
69
* FieldJustification: Left
60
70
* FieldMaxLength: 99
71
+ * ---
72
+ * FieldType: Text
73
+ * FieldName: Text2
74
+ * ...
75
+ * ...
76
+ * ```
61
77
*
62
- * @param $dataString
63
- * @return array
78
+ * @param $input the string to parse
79
+ * @return array the parsed result
64
80
*/
65
- private function parseData ( $ dataString )
81
+ public static function parse ( $ input )
66
82
{
67
- $ output = array ();
68
- $ field = array ();
69
- $ currentField = "" ;
70
- foreach (explode ("\n" , $ dataString ) as $ line ) {
71
- $ trimmedLine = trim ($ line );
83
+ if (strncmp ('--- ' , $ input , 3 ) === 0 ) {
84
+ // Split blocks only if '---' is followed by 'FieldType'
85
+ $ blocks = preg_split (
86
+ '/^---(\r\n|\n|\r)(?=FieldType:)/m ' ,
87
+ substr ($ input ,3 )
88
+ );
89
+ return array_map ('\mikehaertl\pdftk\DataFields::parseBlock ' , $ blocks );
90
+ } else {
91
+ return self ::parseBlock ($ input );
92
+ }
93
+ }
72
94
73
- // ($trimmedLine === '' && $currentField != 'FieldValue')
74
- // Don't start new field for an empty line in a multi-line FieldValue
75
- if ($ trimmedLine === '--- ' || ($ currentField !== 'FieldValue ' && $ trimmedLine === '' )) {
76
- // Block completed; process it
77
- if (sizeof ($ field ) > 0 ) {
78
- $ output [] = $ field ;
95
+ /**
96
+ * Parses a block of this form:
97
+ *
98
+ * ```
99
+ * Name1: Value1
100
+ * Name2: Value2
101
+ * Name3: Value3
102
+ * ...
103
+ * ```
104
+ *
105
+ * @param string $block the block to parse
106
+ * @return array the parsed block values indexed by respective names
107
+ */
108
+ public static function parseBlock ($ block )
109
+ {
110
+ $ data = [];
111
+ $ lines = preg_split ("/( \r\n| \n| \r)/ " , trim ($ block ));
112
+ $ continueKey = null ;
113
+ foreach ($ lines as $ n => $ line ) {
114
+ if ($ continueKey !== null ) {
115
+ $ data [$ continueKey ] .= "\n" . $ line ;
116
+ if (!self ::lineContinues ($ lines , $ n , $ continueKey )) {
117
+ $ continueKey = null ;
118
+ }
119
+ } elseif (preg_match ('/([^:]*): ?(.*)/ ' , $ line , $ match )) {
120
+ $ key = $ match [1 ];
121
+ $ value = $ match [2 ];
122
+ // Convert multiple keys like 'FieldStateOption' to array
123
+ if (isset ($ data [$ key ])) {
124
+ $ data [$ key ] = (array ) $ data [$ key ];
125
+ $ data [$ key ][] = $ value ;
126
+ } else {
127
+ $ data [$ key ] = $ value ;
128
+ }
129
+ if (self ::lineContinues ($ lines , $ n , $ key )) {
130
+ $ continueKey = $ key ;
79
131
}
80
- $ field = array ();
81
- continue ;
82
- }
83
-
84
- // Process contents of data block
85
- $ parts = explode (': ' , $ line );
86
- $ key = null ;
87
- $ value = null ;
88
-
89
- //Continue through lines already processed from FieldValue
90
- if ($ currentField === 'FieldValue '
91
- && $ parts [0 ] !== 'FieldJustification '
92
- && !empty ($ field ['FieldValue ' ])){
93
-
94
- continue ;
95
- }
96
-
97
- // Handle colon in the value
98
- if (sizeof ($ parts ) !== 2 ) {
99
- $ key = $ parts [0 ];
100
- unset($ parts [0 ]);
101
- $ value = implode (': ' , $ parts );
102
- }
103
-
104
- $ key = $ key ?: trim ($ parts [0 ]);
105
- $ value = $ value ?: trim ($ parts [1 ]);
106
-
107
- if ($ currentField === 'FieldValue ' && !empty ($ value )) {
108
- $ value = $ this ->getFieldValue ($ line ,$ dataString );
109
- } else if ($ currentField === 'FieldValue ' ){
110
- $ value = "" ;
111
- }
112
-
113
- if (isset ($ field [$ key ])) {
114
- $ field [$ key ] = (array ) $ field [$ key ];
115
- $ field [$ key ][] = $ value ;
116
- }
117
- else {
118
- $ field [$ key ] = $ value ;
119
132
}
120
133
}
121
-
122
- // process final block
123
- if (sizeof ($ field ) > 0 ) {
124
- $ output [] = $ field ;
125
- }
126
-
127
- return $ output ;
134
+ return $ data ;
128
135
}
129
136
130
137
/**
131
- * Parses a FieldValue for Multiple Lines e.g.
132
- * FieldValue: Text
133
- *
134
- * MoreText
135
- * Something
136
- * ExtraText
137
- * OtherText
138
+ * Checks whether the value for the given line number continues on the next
139
+ * line. This is the case if the next line does not start with either
140
+ * 'FieldValueDefault:' or 'FieldJustification:'.
138
141
*
139
- * FieldJustification: Left
140
- *
141
- * @param string $line The current line being searched
142
- * @param string $dataString
143
- * @return bool|string Returns a string containing the value for FieldValue e.g. Text\n\nMoreText\nSomething etc.
142
+ * @param array $lines all lines of the block
143
+ * @param int $n the 0-based index of the current line
144
+ * @param string the key for the value. Only 'FieldValue' and
145
+ * 'FieldValueDefault' can span multiple lines
146
+ * @return bool whether the value continues in line n + 1
144
147
*/
145
- private function getFieldValue ( $ line , $ dataString )
148
+ protected static function lineContinues ( $ lines , $ n , $ key )
146
149
{
147
- // Offset 'FieldValue:'
148
- $ pos1 = strpos ($ dataString , $ line ) + 11 ;
149
- $ pos2 = strpos ($ dataString , "FieldJustification " , $ pos1 );
150
- $ length = $ pos2 - $ pos1 ;
151
-
152
- $ value = substr (
153
- $ dataString ,
154
- $ pos1 ,
155
- $ length
156
- );
157
-
158
- return $ value ;
150
+ return
151
+ in_array ($ key , ['FieldValue ' , 'FieldValueDefault ' ]) &&
152
+ array_key_exists ($ n + 1 , $ lines ) &&
153
+ !preg_match ('/^Field(ValueDefault|Justification):/ ' , $ lines [$ n + 1 ]);
159
154
}
160
155
}
0 commit comments