@@ -49,6 +49,9 @@ public function __toArray()
49
49
50
50
/**
51
51
* Parse the output of dump_data into something usable.
52
+ *
53
+ * The expected string looks similar to this:
54
+ *
52
55
* InfoBegin
53
56
* InfoKey: Creator
54
57
* InfoValue: Adobe Acrobat Pro DC 15.0
@@ -62,67 +65,74 @@ public function __toArray()
62
65
* BookmarkTitle: First bookmark
63
66
* BookmarkLevel: 1
64
67
* BookmarkPageNumber: 1
68
+ * BookmarkBegin
69
+ * BookmarkTitle: Second bookmark
70
+ * BookmarkLevel: 1
71
+ * BookmarkPageNumber: 2
65
72
*
66
73
* @param $dataString
67
74
* @return array
68
75
*/
69
76
private function parseData ($ dataString )
70
77
{
71
- $ expectType = null ;
72
- $ output = array ('Info ' => array (),'Bookmark ' => array (),'PageMedia ' => array ());
73
- $ field = array ();
74
- $ buffer = array ();
78
+ $ output = array ();
75
79
foreach (explode (PHP_EOL , $ dataString ) as $ line ) {
76
80
$ trimmedLine = trim ($ line );
77
- if ($ trimmedLine === 'InfoBegin ' ) {
78
- $ expectType = 'Info ' ;
79
- continue ;
80
- }
81
- if ($ trimmedLine === 'BookmarkBegin ' ) {
82
- $ expectType = 'Bookmark ' ;
83
- continue ;
84
- }
85
- if ($ trimmedLine === 'PageMediaBegin ' ) {
86
- $ expectType = 'PageMedia ' ;
87
- continue ;
88
- }
89
-
90
- preg_match ('/([^:]*): ?(.*)/ ' , $ trimmedLine , $ match );
91
- $ key = $ match [1 ];
92
- $ value = $ match [2 ];
93
-
94
- if ($ expectType === 'Info ' ) {
95
- if ($ key === 'InfoKey ' ) {
96
- $ buffer ['Key ' ] = $ value ;
97
- } elseif ($ key === 'InfoValue ' ) {
98
- $ buffer ['Value ' ] = $ value ;
81
+ // Parse blocks of the form:
82
+ // AbcBegin
83
+ // AbcData1: Value1
84
+ // AbcData2: Value2
85
+ // AbcBegin
86
+ // AbcData1: Value3
87
+ // AbcData2: Value4
88
+ // ...
89
+ if (preg_match ('/^(\w+)Begin$/ ' , $ trimmedLine , $ matches )) {
90
+ // Previous group ended - if any - so add it to output
91
+ if (!empty ($ group ) && !empty ($ groupData )) {
92
+ $ output [$ group ][] = $ groupData ;
99
93
}
100
- if ( isset ( $ buffer [ ' Value ' ], $ buffer [ ' Key ' ])) {
101
- $ output [ ' Info ' ][ $ buffer [ ' Key ' ]] = $ buffer [ ' Value ' ];
102
- $ buffer = array ();
103
- $ expectType = null ;
94
+ // Now start next group
95
+ $ group = $ matches [ 1 ]; // Info, PageMedia, ...
96
+ if (! isset ( $ output [ $ group ])) {
97
+ $ output [ $ group ] = array () ;
104
98
}
99
+ $ groupData = array ();
105
100
continue ;
106
101
}
107
- if ($ expectType !== null ) {
108
- if (strpos ($ key , $ expectType ) === 0 ) {
109
- $ buffer [str_replace ($ expectType , '' , $ key )] = $ value ;
102
+ if (!empty ($ group )) {
103
+ // Check for AbcData1: Value1
104
+ if (preg_match ("/^ $ group(\w+): ?(.*)$/ " , $ trimmedLine , $ matches )) {
105
+ $ groupData [$ matches [1 ]] = $ matches [2 ];
106
+ continue ;
110
107
} else {
111
- throw new \Exception ("Unexpected input " );
108
+ // Something else, so group ended
109
+ if (!empty ($ groupData )) {
110
+ $ output [$ group ][] = $ groupData ;
111
+ $ groupData = array ();
112
+ }
113
+ $ group = null ;
112
114
}
113
- if ($ expectType === 'Bookmark ' && isset ($ buffer ['Level ' ], $ buffer ['Title ' ], $ buffer ['PageNumber ' ])) {
114
- $ output [$ expectType ][] = $ buffer ;
115
- $ buffer = array ();
116
- $ expectType = null ;
117
- } elseif ($ expectType === 'PageMedia ' && isset ($ buffer ['Number ' ], $ buffer ['Rotation ' ], $ buffer ['Rect ' ], $ buffer ['Dimensions ' ])) {
118
- $ output [$ expectType ][] = $ buffer ;
119
- $ buffer = array ();
120
- $ expectType = null ;
115
+ }
116
+ if (preg_match ('/([^:]*): ?(.*)/ ' , $ trimmedLine , $ matches )) {
117
+ $ output [$ matches [1 ]] = $ matches [2 ];
118
+ }
119
+ }
120
+ // There could be a final group left if it was not followed by another
121
+ // line in the loop
122
+ if (!empty ($ group ) && !empty ($ groupData )) {
123
+ $ output [$ group ][] = $ groupData ;
124
+ }
125
+
126
+ // Info group is a list of ['Key' => 'x', 'Value' => 'y'], so
127
+ // convert it to ['x' => 'y', ...]
128
+ if (isset ($ output ['Info ' ])) {
129
+ $ data = array ();
130
+ foreach ($ output ['Info ' ] as $ infoGroup ) {
131
+ if (isset ($ infoGroup ['Key ' ], $ infoGroup ['Value ' ])) {
132
+ $ data [$ infoGroup ['Key ' ]] = $ infoGroup ['Value ' ];
121
133
}
122
- continue ;
123
- } else {
124
- $ output [$ key ] = $ value ;
125
134
}
135
+ $ output ['Info ' ] = $ data ;
126
136
}
127
137
return $ output ;
128
138
}
0 commit comments