-
-
Notifications
You must be signed in to change notification settings - Fork 394
Open
Description
"htmlparser2": "^10.0.0",
Minimal repro:
const { Parser, DomHandler } = require('htmlparser2');
const render = require('dom-serializer').default;
// Test case: self-closing tag followed by content
const input = '<span />content';
// Parse with htmlparser2
const handler = new DomHandler();
const parser = new Parser(handler);
parser.end(input);
// Get the parsed DOM and render it back to HTML
const dom = handler.root;
const output = render(dom);
console.log('Input HTML: ', input);
console.log('Output HTML: ', output);
Prints:
Input HTML: <span />content
Output HTML: <span>content</span>
I'd expect output html to look either like input, or, at least like: <span></span>content
(as span
is not isVoidElement
).
Here's a more complex example with `pre` that prompted me to create this issue. The `start` and `end` indices are messed up:
const { Parser, DomHandler } = require('htmlparser2');
const render = require('dom-serializer').default;
// Test case: self-closing tag followed by content
const input = '<pre><span />code with</pre> test';
// Parse with htmlparser2
const handler = new DomHandler({
withStartIndices: true,
withEndIndices: true,
})
const parser = new Parser(handler)
parser.end(input);
// Get the parsed DOM and render it back to HTML
const dom = handler.root;
const output = render(dom);
console.log('Input HTML: ', input);
console.log('Output HTML: ', output);
// recursively display start and end indices for each node
function displayIndices(node, level = 0) {
const indent = ' '.repeat(level * 2);
const originalContent = node.startIndex !== undefined ? input.slice(node.startIndex, node.endIndex + 1) : '';
console.log(`${indent}Node: ${node.type}, Start: ${node.startIndex}, End: ${node.endIndex} - Content: "${originalContent}"`);
if (node.children) {
node.children.forEach(child => displayIndices(child, level + 1));
}
}
// Display the start and end indices for each node
console.log('Node indices:');
dom.children.forEach(child => displayIndices(child));
Result:
Input HTML: <pre><span />code with</pre> test
Output HTML: <pre><span>code with</span></pre> test
Node indices:
Node: tag, Start: 0, End: 27 - Content: "<pre><span />code with</pre>"
Node: tag, Start: 5, End: 27 - Content: "<span />code with</pre>"
Node: text, Start: 13, End: 21 - Content: "code with"
Node: text, Start: 28, End: 32 - Content: " test"
note the span
node: Node: tag, Start: 5, End: 27 - Content: "<span />code with</pre>"
augusto-chaching and srenoufd
Metadata
Metadata
Assignees
Labels
No labels