Skip to content

Commit e1866f6

Browse files
[VarDumper] fix handling of non-UTF8 strings
1 parent 6846dd2 commit e1866f6

File tree

5 files changed

+136
-41
lines changed

5 files changed

+136
-41
lines changed

Cloner/Data.php

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu
176176
$cursor->hashCut = $hashCut;
177177
foreach ($children as $key => $child) {
178178
$cursor->hashKeyIsBinary = isset($key[0]) && !preg_match('//u', $key);
179-
$cursor->hashKey = $cursor->hashKeyIsBinary ? self::utf8Encode($key) : $key;
179+
$cursor->hashKey = $key;
180180
$this->dumpItem($dumper, $cursor, $refs, $child);
181181
if (++$cursor->hashIndex === $this->maxItemsPerDepth || $cursor->stop) {
182182
$parentCursor->stop = true;
@@ -191,33 +191,4 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu
191191

192192
return $hashCut;
193193
}
194-
195-
/**
196-
* Portable variant of utf8_encode()
197-
*
198-
* @param string $s
199-
*
200-
* @return string
201-
*
202-
* @internal
203-
*/
204-
public static function utf8Encode($s)
205-
{
206-
if (function_exists('mb_convert_encoding')) {
207-
return mb_convert_encoding($s, 'UTF-8', 'CP1252');
208-
}
209-
210-
$s .= $s;
211-
$len = strlen($s);
212-
213-
for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) {
214-
switch (true) {
215-
case $s[$i] < "\x80": $s[$j] = $s[$i]; break;
216-
case $s[$i] < "\xC0": $s[$j] = "\xC2"; $s[++$j] = $s[$i]; break;
217-
default: $s[$j] = "\xC3"; $s[++$j] = chr(ord($s[$i]) - 64); break;
218-
}
219-
}
220-
221-
return substr($s, 0, $j);
222-
}
223194
}

Cloner/VarCloner.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,10 @@ protected function doClone($var)
9797
$stub->class = Stub::STRING_BINARY;
9898
if (0 <= $maxString && 0 < $cut = strlen($v) - $maxString) {
9999
$stub->cut = $cut;
100-
$cut = substr_replace($v, '', -$cut);
100+
$stub->value = substr($v, 0, -$cut);
101101
} else {
102-
$cut = $v;
102+
$stub->value = $v;
103103
}
104-
$stub->value = Data::utf8Encode($cut);
105104
} elseif (0 <= $maxString && isset($v[1 + ($maxString >> 2)]) && 0 < $cut = iconv_strlen($v, 'UTF-8') - $maxString) {
106105
$stub = new Stub();
107106
$stub->type = Stub::TYPE_STRING;

Dumper/AbstractDumper.php

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,16 @@ abstract class AbstractDumper implements DataDumperInterface, DumperInterface
2929
protected $decimalPoint; // This is locale dependent
3030
protected $indentPad = ' ';
3131

32+
private $charset;
33+
private $charsetConverter;
34+
3235
/**
33-
* @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
36+
* @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
37+
* @param string $charset The default character encoding to use for non-UTF8 strings.
3438
*/
35-
public function __construct($output = null)
39+
public function __construct($output = null, $charset = null)
3640
{
41+
$this->setCharset($charset ?: ini_get('php.output_encoding') ?: ini_get('default_charset') ?: 'UTF-8');
3742
$this->decimalPoint = (string) 0.5;
3843
$this->decimalPoint = $this->decimalPoint[1];
3944
$this->setOutput($output ?: static::$defaultOutput);
@@ -67,6 +72,43 @@ public function setOutput($output)
6772
return $prev;
6873
}
6974

75+
/**
76+
* Sets the default character encoding to use for non-UTF8 strings.
77+
*
78+
* @param string $charset The default character encoding to use for non-UTF8 strings.
79+
*
80+
* @return string The previous charset.
81+
*/
82+
public function setCharset($charset)
83+
{
84+
$prev = $this->charset;
85+
$this->charsetConverter = 'fallback';
86+
87+
$charset = strtoupper($charset);
88+
$charset = null === $charset || 'UTF-8' === $charset || 'UTF8' === $charset ? 'CP1252' : $charset;
89+
90+
$supported = true;
91+
set_error_handler(function () use (&$supported) {$supported = false;});
92+
93+
if (function_exists('mb_encoding_aliases') && mb_encoding_aliases($charset)) {
94+
$this->charset = $charset;
95+
$this->charsetConverter = 'mbstring';
96+
} elseif (function_exists('iconv')) {
97+
$supported = true;
98+
iconv($charset, 'UTF-8', '');
99+
if ($supported) {
100+
$this->charset = $charset;
101+
$this->charsetConverter = 'iconv';
102+
}
103+
}
104+
if ('fallback' === $this->charsetConverter) {
105+
$this->charset = 'ISO-8859-1';
106+
}
107+
restore_error_handler();
108+
109+
return $prev;
110+
}
111+
70112
/**
71113
* Sets the indentation pad string.
72114
*
@@ -131,4 +173,50 @@ protected function echoLine($line, $depth, $indentPad)
131173
fwrite($this->outputStream, str_repeat($indentPad, $depth).$line."\n");
132174
}
133175
}
176+
177+
/**
178+
* Converts a non-UTF-8 string to UTF-8.
179+
*
180+
* @param string $s The non-UTF-8 string to convert.
181+
*
182+
* @return string The string converted to UTF-8.
183+
*/
184+
protected function utf8Encode($s)
185+
{
186+
if ('mbstring' === $this->charsetConverter) {
187+
return mb_convert_encoding($s, 'UTF-8', mb_check_encoding($s, $this->charset) ? $this->charset : '8bit');
188+
}
189+
if ('iconv' === $this->charsetConverter) {
190+
$valid = true;
191+
set_error_handler(function () use (&$valid) {$valid = false;});
192+
$c = iconv($this->charset, 'UTF-8', $s);
193+
restore_error_handler();
194+
if ($valid) {
195+
return $c;
196+
}
197+
}
198+
199+
$s .= $s;
200+
$len = strlen($s);
201+
202+
for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) {
203+
switch (true) {
204+
case $s[$i] < "\x80":
205+
$s[$j] = $s[$i];
206+
break;
207+
208+
case $s[$i] < "\xC0":
209+
$s[$j] = "\xC2";
210+
$s[++$j] = $s[$i];
211+
break;
212+
213+
default:
214+
$s[$j] = "\xC3";
215+
$s[++$j] = chr(ord($s[$i]) - 64);
216+
break;
217+
}
218+
}
219+
220+
return substr($s, 0, $j);
221+
}
134222
}

Dumper/CliDumper.php

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
namespace Symfony\Component\VarDumper\Dumper;
1313

14-
use Symfony\Component\VarDumper\Cloner\Data;
1514
use Symfony\Component\VarDumper\Cloner\Cursor;
1615

1716
/**
@@ -48,9 +47,9 @@ class CliDumper extends AbstractDumper
4847
/**
4948
* {@inheritdoc}
5049
*/
51-
public function __construct($output = null)
50+
public function __construct($output = null, $charset = null)
5251
{
53-
parent::__construct($output);
52+
parent::__construct($output, $charset);
5453

5554
if ('\\' === DIRECTORY_SEPARATOR && false !== @getenv('ANSICON')) {
5655
// Use only the base 16 xterm colors when using ANSICON
@@ -140,8 +139,8 @@ public function dumpScalar(Cursor $cursor, $type, $value)
140139
break;
141140

142141
default:
143-
$attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? Data::utf8Encode($value) : $value;
144-
$value = isset($type[0]) && !preg_match('//u', $type) ? Data::utf8Encode($type) : $type;
142+
$attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? $this->utf8Encode($value) : $value;
143+
$value = isset($type[0]) && !preg_match('//u', $type) ? $this->utf8Encode($type) : $type;
145144
break;
146145
}
147146

@@ -157,6 +156,9 @@ public function dumpString(Cursor $cursor, $str, $bin, $cut)
157156
{
158157
$this->dumpKey($cursor);
159158

159+
if ($bin) {
160+
$str = $this->utf8Encode($str);
161+
}
160162
if ('' === $str) {
161163
$this->line .= '""';
162164
$this->dumpLine($cursor->depth);
@@ -220,6 +222,9 @@ public function enterHash(Cursor $cursor, $type, $class, $hasChild)
220222
{
221223
$this->dumpKey($cursor);
222224

225+
if (!preg_match('//u', $class)) {
226+
$class = $this->utf8Encode($class);
227+
}
223228
if (Cursor::HASH_OBJECT === $type) {
224229
$prefix = 'stdClass' !== $class ? $this->style('note', $class).' {' : '{';
225230
} elseif (Cursor::HASH_RESOURCE === $type) {
@@ -279,6 +284,9 @@ protected function dumpEllipsis(Cursor $cursor, $hasChild, $cut)
279284
protected function dumpKey(Cursor $cursor)
280285
{
281286
if (null !== $key = $cursor->hashKey) {
287+
if ($cursor->hashKeyIsBinary) {
288+
$key = $this->utf8Encode($key);
289+
}
282290
$attr = array('binary' => $cursor->hashKeyIsBinary);
283291
$bin = $cursor->hashKeyIsBinary ? 'b' : '';
284292
$style = 'key';

Tests/HtmlDumperTest.php

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ public function testGet()
2424
require __DIR__.'/Fixtures/dumb-var.php';
2525

2626
$dumper = new HtmlDumper('php://output');
27-
$dumper->setColors(false);
2827
$dumper->setDumpHeader('<foo></foo>');
2928
$dumper->setDumpBoundaries('<bar>', '</bar>');
3029
$cloner = new VarCloner();
@@ -108,6 +107,36 @@ public function testGet()
108107
</samp>]
109108
</bar>
110109
110+
EOTXT
111+
,
112+
113+
$out
114+
);
115+
}
116+
117+
public function testCharset()
118+
{
119+
if (!extension_loaded('mbstring')) {
120+
$this->markTestSkipped('This test requires mbstring.');
121+
}
122+
$var = mb_convert_encoding('Словарь', 'CP1251', 'UTF-8');
123+
124+
$dumper = new HtmlDumper('php://output', 'CP1251');
125+
$dumper->setDumpHeader('<foo></foo>');
126+
$dumper->setDumpBoundaries('<bar>', '</bar>');
127+
$cloner = new VarCloner();
128+
129+
$data = $cloner->cloneVar($var);
130+
$out = fopen('php://memory', 'r+b');
131+
$dumper->dump($data, $out);
132+
rewind($out);
133+
$out = stream_get_contents($out);
134+
135+
$this->assertStringMatchesFormat(
136+
<<<EOTXT
137+
<foo></foo><bar>b"<span class=sf-dump-str title="7 binary or non-UTF-8 characters">&#1057;&#1083;&#1086;&#1074;&#1072;&#1088;&#1100;</span>"
138+
</bar>
139+
111140
EOTXT
112141
,
113142

0 commit comments

Comments
 (0)