Skip to content

Commit 36d38dc

Browse files
Fix HTML parsing to handle unclosed img tags and ensure valid XML conversion
1 parent 3d0f31f commit 36d38dc

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

src/PhpWord/Shared/Html.php

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,21 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit
8989
if (\PHP_VERSION_ID < 80000) {
9090
$orignalLibEntityLoader = libxml_disable_entity_loader(true);
9191
}
92-
$dom = new DOMDocument();
93-
$dom->preserveWhiteSpace = $preserveWhiteSpace;
94-
$dom->loadHTML($html);
95-
static::$xpath = new DOMXPath($dom);
96-
$node = $dom->getElementsByTagName('body');
92+
93+
$htmlDom = new DOMDocument();
94+
$htmlDom->loadHTML($html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
95+
96+
$validXml = $htmlDom->saveXML($htmlDom->documentElement);
97+
if (false === $validXml) {
98+
throw new Exception('Could not convert HTML to valid XML');
99+
}
100+
101+
$xmlDom = new DOMDocument();
102+
$xmlDom->preserveWhiteSpace = $preserveWhiteSpace;
103+
$xmlDom->loadXML($validXml);
104+
105+
static::$xpath = new DOMXPath($xmlDom);
106+
$node = $xmlDom->getElementsByTagName('body');
97107

98108
static::parseNode($node->item(0), $element);
99109
if (\PHP_VERSION_ID < 80000) {

0 commit comments

Comments
 (0)