Skip to content

Commit 80ca2e2

Browse files
Fix HTML to DOM client parser
`DOMParser` was nesting certain elements (e.g., div) in `<html>` and `body` so formatting the child nodes of document gave incorrect results. Update logic so that the correct nodes are parsed and formatted.
1 parent 15cc84c commit 80ca2e2

File tree

1 file changed

+29
-6
lines changed

1 file changed

+29
-6
lines changed

lib/html-to-dom-client.js

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,22 +102,45 @@ function formatDOM(nodes, parentNode) {
102102
* @return {Object} - The DOM nodes.
103103
*/
104104
function htmlToDOMClient(html) {
105-
var root;
105+
var match = typeof html === 'string' ? html.match(/<(.+?)>/) : null;
106+
var tagName;
107+
var parentNode;
108+
var nodes;
109+
110+
if (match && typeof match[1] === 'string') {
111+
tagName = match[1].toLowerCase();
112+
}
106113

107114
// `DOMParser` can parse full HTML
108115
// https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
109-
if (window.DOMParser) {
116+
if (tagName && window.DOMParser) {
110117
var parser = new window.DOMParser();
111-
root = parser.parseFromString(html, 'text/html');
118+
var doc = parser.parseFromString(html, 'text/html');
119+
120+
// <head> and <body> are siblings
121+
if (tagName === 'head' || tagName === 'body') {
122+
nodes = doc.getElementsByTagName(tagName);
123+
124+
// document's child nodes
125+
} else if (tagName === 'html') {
126+
nodes = doc.childNodes;
127+
128+
// get the element's parent's child nodes
129+
// do this in case of adjacent elements
130+
} else {
131+
parentNode = doc.getElementsByTagName(tagName)[0].parentNode;
132+
nodes = parentNode.childNodes;
133+
}
112134

113135
// otherwise, use `innerHTML`
114136
// but this will strip out tags like <html> and <body>
115137
} else {
116-
root = document.createElement('div');
117-
root.innerHTML = html;
138+
parentNode = document.createElement('div');
139+
parentNode.innerHTML = html;
140+
nodes = parentNode.childNodes;
118141
}
119142

120-
return formatDOM(root.childNodes);
143+
return formatDOM(nodes);
121144
}
122145

123146
/**

0 commit comments

Comments
 (0)