@@ -13,6 +13,8 @@ class StringEncoder implements Encoder
13
13
/** @var array Default values for options in the encoder */
14
14
private static $ defaultOptions = [
15
15
'string.escape ' => true ,
16
+ 'string.binary ' => false ,
17
+ 'string.utf8 ' => false ,
16
18
];
17
19
18
20
public function getDefaultOptions ()
@@ -27,11 +29,44 @@ public function supports($value)
27
29
28
30
public function encode ($ value , $ depth , array $ options , callable $ encode )
29
31
{
30
- if (!$ options ['string.escape ' ] || preg_match ('/^[\x20-\x7E]*$/ ' , $ value )) {
31
- return $ this ->getSingleQuotedString ($ value );
32
+ $ value = (string ) $ value ;
33
+
34
+ if (preg_match ('/[^\x20-\x7E]/ ' , $ value )) {
35
+ if ($ this ->isBinaryString ($ value , $ options )) {
36
+ return $ this ->encodeBinaryString ($ value );
37
+ } elseif ($ options ['string.escape ' ]) {
38
+ return $ this ->getDoubleQuotedString ($ value , $ options );
39
+ }
40
+ }
41
+
42
+ return $ this ->getSingleQuotedString ($ value );
43
+ }
44
+
45
+ private function isBinaryString ($ string , $ options )
46
+ {
47
+ if (!$ options ['string.binary ' ]) {
48
+ return false ;
32
49
}
33
50
34
- return $ this ->getDoubleQuotedString ($ value );
51
+ // UTF-8 validity test without mbstring extension
52
+ $ pattern =
53
+ '/^(?>
54
+ [\x00-\x7F]+ # ASCII
55
+ | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
56
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding over longs
57
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
58
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
59
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
60
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
61
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
62
+ )*$/x ' ;
63
+
64
+ return !preg_match ($ pattern , $ string );
65
+ }
66
+
67
+ private function encodeBinaryString ($ string )
68
+ {
69
+ return sprintf ("base64_decode('%s') " , base64_encode ($ string ));
35
70
}
36
71
37
72
/**
@@ -49,21 +84,62 @@ private function getSingleQuotedString($string)
49
84
* @param string $string String to wrap and escape
50
85
* @return string The string wrapped in double quotes and escape correctly
51
86
*/
52
- private function getDoubleQuotedString ($ string )
87
+ private function getDoubleQuotedString ($ string, $ options )
53
88
{
89
+ $ string = strtr ($ string , [
90
+ "\n" => '\n ' ,
91
+ "\r" => '\r ' ,
92
+ "\t" => '\t ' ,
93
+ '$ ' => '\$ ' ,
94
+ '" ' => '\" ' ,
95
+ '\\' => '\\\\' ,
96
+ ]);
97
+
98
+ if ($ options ['string.utf8 ' ]) {
99
+ $ string = $ this ->encodeUtf8 ($ string );
100
+ }
101
+
54
102
return sprintf ('"%s" ' , preg_replace_callback (
55
103
'/[^\x20-\x7E]/ ' ,
56
104
function ($ matches ) {
57
105
return sprintf ('\x%02x ' , ord ($ matches [0 ]));
58
106
},
59
- strtr ($ string , [
60
- "\n" => '\n ' ,
61
- "\r" => '\r ' ,
62
- "\t" => '\t ' ,
63
- '$ ' => '\$ ' ,
64
- '" ' => '\" ' ,
65
- '\\' => '\\\\' ,
66
- ])
107
+ $ string
67
108
));
68
109
}
110
+
111
+ private function encodeUtf8 ($ string )
112
+ {
113
+ $ pattern =
114
+ '/ [\xC2-\xDF][\x80-\xBF]
115
+ | \xE0[\xA0-\xBF][\x80-\xBF]
116
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}
117
+ | \xED[\x80-\x9F][\x80-\xBF]
118
+ | \xF0[\x90-\xBF][\x80-\xBF]{2}
119
+ | [\xF1-\xF3][\x80-\xBF]{3}
120
+ | \xF4[\x80-\x8F][\x80-\xBF]{2}/x ' ;
121
+
122
+ return preg_replace_callback ($ pattern , function ($ match ) {
123
+ return sprintf ('\u{%s} ' , dechex ($ this ->getCodePoint ($ match [0 ])));
124
+ }, $ string );
125
+ }
126
+
127
+ private function getCodePoint ($ bytes )
128
+ {
129
+ if (strlen ($ bytes ) === 2 ) {
130
+ return ((ord ($ bytes [0 ]) & 0b11111 ) << 6 )
131
+ | (ord ($ bytes [1 ]) & 0b111111 );
132
+ }
133
+
134
+ if (strlen ($ bytes ) === 3 ) {
135
+ return ((ord ($ bytes [0 ]) & 0b1111 ) << 12 )
136
+ | ((ord ($ bytes [1 ]) & 0b111111 ) << 6 )
137
+ | (ord ($ bytes [2 ]) & 0b111111 );
138
+ }
139
+
140
+ return ((ord ($ bytes [0 ]) & 0b111 ) << 18 )
141
+ | ((ord ($ bytes [1 ]) & 0b111111 ) << 12 )
142
+ | ((ord ($ bytes [2 ]) & 0b111111 ) << 6 )
143
+ | (ord ($ bytes [3 ]) & 0b111111 );
144
+ }
69
145
}
0 commit comments