Skip to content

Commit 50777f4

Browse files
Merge branch '4.4' into 5.4
* 4.4: [PropertyAccess] Fix handling of uninitialized property of parent class [DomCrawler] ignore bad charsets [Validator] Fix minRatio and maxRatio when getting rounded [Console] Revert StringInput bc break from #45088 [Form] Do not fix URL protocol for relative URLs [Serializer] make XmlEncoder stateless thus reentrant
2 parents 2634381 + 46e7bd2 commit 50777f4

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

Crawler.php

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -164,24 +164,17 @@ public function addContent(string $content, string $type = null)
164164
return;
165165
}
166166

167-
$charset = null;
168-
if (false !== $pos = stripos($type, 'charset=')) {
169-
$charset = substr($type, $pos + 8);
170-
if (false !== $pos = strpos($charset, ';')) {
171-
$charset = substr($charset, 0, $pos);
172-
}
173-
}
167+
$charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
174168

175169
// http://www.w3.org/TR/encoding/#encodings
176170
// http://www.w3.org/TR/REC-xml/#NT-EncName
177-
if (null === $charset &&
178-
preg_match('/\<meta[^\>]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i', $content, $matches)) {
179-
$charset = $matches[1];
180-
}
171+
$content = preg_replace_callback('/(charset *= *["\']?)([a-zA-Z\-0-9_:.]+)/i', function ($m) use (&$charset) {
172+
if ('charset=' === $this->convertToHtmlEntities('charset=', $m[2])) {
173+
$charset = $m[2];
174+
}
181175

182-
if (null === $charset) {
183-
$charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
184-
}
176+
return $m[1].$charset;
177+
}, $content, 1);
185178

186179
if ('x' === $xmlMatches[1]) {
187180
$this->addXmlContent($content, $charset);

Tests/AbstractCrawlerTest.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ public function testAddContent()
190190
$crawler = $this->createCrawler();
191191
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
192192
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
193+
194+
$crawler = $this->createCrawler();
195+
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html>');
196+
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() ignores bad charset');
193197
}
194198

195199
/**

0 commit comments

Comments
 (0)