Skip to content

Commit d2a5d2b

Browse files
Merge branch '5.4' into 6.0
* 5.4: [PropertyAccess] Fix handling of uninitialized property of parent class [DomCrawler] ignore bad charsets [Validator] Fix minRatio and maxRatio when getting rounded [Console] Revert StringInput bc break from #45088 [Form] Do not fix URL protocol for relative URLs [Serializer] make XmlEncoder stateless thus reentrant
2 parents 24d9de5 + 50777f4 commit d2a5d2b

File tree

2 files changed

+11
-14
lines changed

2 files changed

+11
-14
lines changed

Crawler.php

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -142,24 +142,17 @@ public function addContent(string $content, string $type = null)
142142
return;
143143
}
144144

145-
$charset = null;
146-
if (false !== $pos = stripos($type, 'charset=')) {
147-
$charset = substr($type, $pos + 8);
148-
if (false !== $pos = strpos($charset, ';')) {
149-
$charset = substr($charset, 0, $pos);
150-
}
151-
}
145+
$charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
152146

153147
// http://www.w3.org/TR/encoding/#encodings
154148
// http://www.w3.org/TR/REC-xml/#NT-EncName
155-
if (null === $charset &&
156-
preg_match('/\<meta[^\>]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i', $content, $matches)) {
157-
$charset = $matches[1];
158-
}
149+
$content = preg_replace_callback('/(charset *= *["\']?)([a-zA-Z\-0-9_:.]+)/i', function ($m) use (&$charset) {
150+
if ('charset=' === $this->convertToHtmlEntities('charset=', $m[2])) {
151+
$charset = $m[2];
152+
}
159153

160-
if (null === $charset) {
161-
$charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
162-
}
154+
return $m[1].$charset;
155+
}, $content, 1);
163156

164157
if ('x' === $xmlMatches[1]) {
165158
$this->addXmlContent($content, $charset);

Tests/AbstractCrawlerTest.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ public function testAddContent()
180180
$crawler = $this->createCrawler();
181181
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
182182
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
183+
184+
$crawler = $this->createCrawler();
185+
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html>');
186+
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() ignores bad charset');
183187
}
184188

185189
/**

0 commit comments

Comments
 (0)