Skip to content

Commit b7b5dd2

Browse files
feat: remove htmlparser2 favor parse5 (#340)
BREAKING CHANGE: migrate from `htmlparser2` to `parse5`
1 parent 348e4f5 commit b7b5dd2

File tree

9 files changed

+1697
-2130
lines changed

9 files changed

+1697
-2130
lines changed

package-lock.json

Lines changed: 1274 additions & 1698 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
},
4545
"dependencies": {
4646
"html-minifier-terser": "^5.1.1",
47-
"htmlparser2": "^4.1.0",
4847
"loader-utils": "^2.0.0",
48+
"parse5-sax-parser": "^6.0.1",
4949
"schema-utils": "^3.0.0"
5050
},
5151
"devDependencies": {
@@ -75,6 +75,7 @@
7575
"posthtml-webp": "^2.1.0",
7676
"prettier": "^2.1.2",
7777
"standard-version": "^9.0.0",
78+
"unescape-unicode": "^0.2.0",
7879
"url-loader": "^4.1.1",
7980
"webpack": "^5.5.0"
8081
},

src/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ export default async function loader(content) {
4949
plugins.push(minimizerPlugin({ minimize: options.minimize, errors }));
5050
}
5151

52-
const { html } = pluginRunner(plugins).process(content);
52+
const { html } = await pluginRunner(plugins).process(content);
5353

5454
for (const error of errors) {
5555
this.emitError(error instanceof Error ? error : new Error(error));

src/plugins/source-plugin.js

Lines changed: 130 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Parser } from 'htmlparser2';
1+
import SAXParser from 'parse5-sax-parser';
22
import { isUrlRequest } from 'loader-utils';
33

44
import HtmlSourceError from '../HtmlSourceError';
@@ -9,6 +9,7 @@ import {
99
normalizeUrl,
1010
requestify,
1111
isUrlRequestable,
12+
c0ControlCodesExclude,
1213
} from '../utils';
1314

1415
export default (options) =>
@@ -36,186 +37,145 @@ export default (options) =>
3637
return false;
3738
}
3839

40+
const adaptedAttributes = attributes.reduce((accumulator, item) => {
41+
// eslint-disable-next-line no-param-reassign
42+
accumulator[item.name] = item.value;
43+
return accumulator;
44+
}, {});
45+
3946
return element.filter
40-
? element.filter(tag, attribute, attributes, resourcePath)
47+
? element.filter(tag, attribute, adaptedAttributes, resourcePath)
4148
: true;
4249
});
4350
};
51+
4452
const { resourcePath } = options;
45-
const parser = new Parser(
46-
{
47-
attributesMeta: {},
48-
onattribute(name, value) {
49-
// eslint-disable-next-line no-underscore-dangle
50-
const endIndex = parser._tokenizer._index;
51-
const startIndex = endIndex - value.length;
52-
const unquoted = html[endIndex] !== '"' && html[endIndex] !== "'";
53-
54-
this.attributesMeta[name] = { startIndex, unquoted };
55-
},
56-
onopentag(tag, attributes) {
57-
Object.keys(attributes).forEach((attribute) => {
58-
const value = attributes[attribute];
59-
const {
60-
startIndex: valueStartIndex,
61-
unquoted,
62-
} = this.attributesMeta[attribute];
53+
const parser5 = new SAXParser({ sourceCodeLocationInfo: true });
54+
55+
parser5.on('startTag', (node) => {
56+
const { tagName, attrs, sourceCodeLocation } = node;
57+
58+
attrs.forEach((attribute) => {
59+
const { value, prefix } = attribute;
60+
let { name } = attribute;
61+
62+
name = prefix ? `${prefix}:${name}` : name;
63+
64+
if (!sourceCodeLocation.attrs[name]) {
65+
return;
66+
}
67+
68+
const foundAttribute = getAttribute(tagName, name, attrs, resourcePath);
6369

64-
const foundAttribute = getAttribute(
65-
tag,
66-
attribute,
67-
attributes,
68-
resourcePath
69-
);
70+
if (!foundAttribute) {
71+
return;
72+
}
73+
74+
const { type } = foundAttribute;
75+
76+
const target = html.slice(
77+
sourceCodeLocation.attrs[name].startOffset,
78+
sourceCodeLocation.attrs[name].endOffset
79+
);
80+
81+
const unquoted =
82+
target[target.length - 1] !== '"' &&
83+
target[target.length - 1] !== "'";
84+
85+
// eslint-disable-next-line default-case
86+
switch (type) {
87+
case 'src': {
88+
let source;
89+
90+
try {
91+
source = parseSrc(value);
92+
} catch (error) {
93+
options.errors.push(
94+
new HtmlSourceError(
95+
`Bad value for attribute "${attribute.name}" on element "${tagName}": ${error.message}`,
96+
sourceCodeLocation.attrs[name].startOffset,
97+
sourceCodeLocation.attrs[name].endOffset,
98+
html
99+
)
100+
);
70101

71-
if (!foundAttribute) {
72102
return;
73103
}
74104

75-
const { type } = foundAttribute;
76-
77-
// eslint-disable-next-line default-case
78-
switch (type) {
79-
case 'src': {
80-
let source;
81-
82-
try {
83-
source = parseSrc(value);
84-
} catch (error) {
85-
options.errors.push(
86-
new HtmlSourceError(
87-
`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
88-
parser.startIndex,
89-
parser.endIndex,
90-
html
91-
)
92-
);
93-
94-
return;
95-
}
96-
97-
if (!isUrlRequestable(source.value, root)) {
98-
return;
99-
}
100-
101-
const startIndex = valueStartIndex + source.startIndex;
102-
const endIndex = startIndex + source.value.length;
103-
104-
sources.push({
105-
name: attribute,
106-
value: source.value,
107-
unquoted,
108-
startIndex,
109-
endIndex,
110-
});
111-
112-
break;
113-
}
114-
case 'srcset': {
115-
let sourceSet;
116-
117-
try {
118-
sourceSet = parseSrcset(value);
119-
} catch (error) {
120-
options.errors.push(
121-
new HtmlSourceError(
122-
`Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
123-
parser.startIndex,
124-
parser.endIndex,
125-
html
126-
)
127-
);
128-
129-
return;
130-
}
131-
132-
sourceSet.forEach((sourceItem) => {
133-
const { source } = sourceItem;
134-
const startIndex = valueStartIndex + source.startIndex;
135-
const endIndex = startIndex + source.value.length;
136-
137-
if (!isUrlRequestable(source.value, root)) {
138-
return;
139-
}
140-
141-
sources.push({
142-
name: attribute,
143-
value: source.value,
144-
unquoted,
145-
startIndex,
146-
endIndex,
147-
});
148-
});
149-
150-
break;
151-
}
152-
// Need improve
153-
// case 'include': {
154-
// let source;
155-
//
156-
// // eslint-disable-next-line no-underscore-dangle
157-
// if (parser._tokenizer._state === 4) {
158-
// return;
159-
// }
160-
//
161-
// try {
162-
// source = parseSrc(value);
163-
// } catch (error) {
164-
// options.errors.push(
165-
// new HtmlSourceError(
166-
// `Bad value for attribute "${attribute}" on element "${tag}": ${error.message}`,
167-
// parser.startIndex,
168-
// parser.endIndex,
169-
// html
170-
// )
171-
// );
172-
//
173-
// return;
174-
// }
175-
//
176-
// if (!urlFilter(attribute, source.value, resourcePath)) {
177-
// return;
178-
// }
179-
//
180-
// const { startIndex } = parser;
181-
// const closingTag = html
182-
// .slice(startIndex - 1)
183-
// .match(
184-
// new RegExp(`<s*${tag}[^>]*>(?:.*?)</${tag}[^<>]*>`, 's')
185-
// );
186-
//
187-
// if (!closingTag) {
188-
// return;
189-
// }
190-
//
191-
// const endIndex = startIndex + closingTag[0].length;
192-
// const importItem = getImportItem(source.value);
193-
// const replacementItem = getReplacementItem(importItem);
194-
//
195-
// sources.push({ replacementItem, startIndex, endIndex });
196-
//
197-
// break;
198-
// }
105+
source = c0ControlCodesExclude(source);
106+
107+
if (!isUrlRequestable(source.value, root)) {
108+
return;
199109
}
200-
});
201-
202-
this.attributesMeta = {};
203-
},
204-
onerror(error) {
205-
options.errors.push(error);
206-
},
207-
},
208-
{
209-
decodeEntities: false,
210-
lowerCaseTags: false,
211-
lowerCaseAttributeNames: false,
212-
recognizeCDATA: true,
213-
recognizeSelfClosing: true,
214-
}
215-
);
216110

217-
parser.write(html);
218-
parser.end();
111+
const startOffset =
112+
sourceCodeLocation.attrs[name].startOffset +
113+
target.indexOf(source.value, name.length);
114+
115+
sources.push({
116+
name,
117+
value: source.value,
118+
unquoted,
119+
startIndex: startOffset,
120+
endIndex: startOffset + source.value.length,
121+
});
122+
123+
break;
124+
}
125+
126+
case 'srcset': {
127+
let sourceSet;
128+
129+
try {
130+
sourceSet = parseSrcset(value);
131+
} catch (error) {
132+
options.errors.push(
133+
new HtmlSourceError(
134+
`Bad value for attribute "${attribute.name}" on element "${tagName}": ${error.message}`,
135+
sourceCodeLocation.attrs[name].startOffset,
136+
sourceCodeLocation.attrs[name].endOffset,
137+
html
138+
)
139+
);
140+
141+
return;
142+
}
143+
144+
sourceSet = sourceSet.map((item) => ({
145+
source: c0ControlCodesExclude(item.source),
146+
}));
147+
148+
let searchFrom = name.length;
149+
150+
sourceSet.forEach((sourceItem) => {
151+
const { source } = sourceItem;
152+
153+
if (!isUrlRequestable(source.value, root)) {
154+
return;
155+
}
156+
157+
const startOffset =
158+
sourceCodeLocation.attrs[name].startOffset +
159+
target.indexOf(source.value, searchFrom);
160+
161+
searchFrom = target.indexOf(source.value, searchFrom) + 1;
162+
163+
sources.push({
164+
name,
165+
value: source.value,
166+
unquoted,
167+
startIndex: startOffset,
168+
endIndex: startOffset + source.value.length,
169+
});
170+
});
171+
172+
break;
173+
}
174+
}
175+
});
176+
});
177+
178+
parser5.end(html);
219179

220180
const imports = new Map();
221181
const replacements = new Map();
@@ -253,6 +213,7 @@ export default (options) =>
253213
const request = requestify(normalizedUrl, root);
254214
const newUrl = prefix ? `${prefix}!${request}` : request;
255215
const importKey = newUrl;
216+
256217
let importName = imports.get(importKey);
257218

258219
if (!importName) {

src/utils.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,3 +795,32 @@ export function getExportCode(html, options) {
795795

796796
return `// Exports\nmodule.exports = code;`;
797797
}
798+
799+
function isASCIIC0group(character) {
800+
// C0 and &nbsp;
801+
// eslint-disable-next-line no-control-regex
802+
return /^[\u0001-\u0019\u00a0]/.test(character);
803+
}
804+
805+
export function c0ControlCodesExclude(source) {
806+
let { value, startIndex } = source;
807+
808+
if (!value) {
809+
throw new Error('Must be non-empty');
810+
}
811+
812+
while (isASCIIC0group(value.substring(0, 1))) {
813+
startIndex += 1;
814+
value = value.substring(1, value.length);
815+
}
816+
817+
while (isASCIIC0group(value.substring(value.length - 1, value.length))) {
818+
value = value.substring(0, value.length - 1);
819+
}
820+
821+
if (!value) {
822+
throw new Error('Must be non-empty');
823+
}
824+
825+
return { value, startIndex };
826+
}

0 commit comments

Comments
 (0)