Skip to content

Commit 24b0427

Browse files
fix: parser tags and attributes according spec (#243)
1 parent 3df909d commit 24b0427

File tree

8 files changed

+444
-50
lines changed

8 files changed

+444
-50
lines changed

src/plugins/attribute-plugin.js

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,52 @@ function processMatch(match, strUntilValue, name, value, index) {
426426
});
427427
}
428428

429+
// https://html.spec.whatwg.org/multipage/syntax.html#syntax-tag-name
430+
const validTagName = '[A-Za-z0-9]+';
431+
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
432+
const validCustomElementName =
433+
'[a-z](?:[-.0-9_a-z\xB7\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF])*-(?:[-.0-9_a-z\xB7\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF])*';
434+
435+
const matchTagRegExp = `<((?:${validCustomElementName})|(?:${validTagName}))\\s+`;
436+
437+
// https://html.spec.whatwg.org/multipage/syntax.html#attributes-2
438+
const controls = '\u007F-\u009F';
439+
const invalid = ' "\'>/=';
440+
const noncharacter = '\uFDD0-\uFDEF\uFFFE\uFFFF\uD800-\uDFFF';
441+
const validAttribute = `[^${controls}${invalid}${noncharacter}]+`;
442+
443+
const validAttributeRegExp1 = `((${validAttribute})\\s*=\\s*")([^"]*)"`;
444+
const validAttributeRegExp2 = `((${validAttribute})\\s*=\\s*')([^']*)'`;
445+
const validAttributeRegExp3 = `((${validAttribute})\\s*=\\s*)([^\\s>]+)`;
446+
447+
function getParser() {
448+
const outside = {
449+
'<!--.*?-->': true,
450+
'<![CDATA[.*?]]>': true,
451+
'<[!\\?].*?>': true,
452+
'</[^>]+>': true,
453+
};
454+
455+
outside[matchTagRegExp] = function matchTag(match, tagName) {
456+
this.currentTag = tagName;
457+
458+
return 'inside';
459+
};
460+
461+
const inside = {
462+
// eat up whitespace
463+
'\\s+': true,
464+
// end of attributes
465+
'>': 'outside',
466+
};
467+
468+
inside[validAttributeRegExp1] = processMatch;
469+
inside[validAttributeRegExp2] = processMatch;
470+
inside[validAttributeRegExp3] = processMatch;
471+
472+
return new Parser({ outside, inside });
473+
}
474+
429475
export default (options) =>
430476
function process(html, result) {
431477
const tagsAndAttributes =
@@ -444,36 +490,19 @@ export default (options) =>
444490
]
445491
: options.attributes;
446492

447-
const parser = new Parser({
448-
outside: {
449-
'<!--.*?-->': true,
450-
'<![CDATA[.*?]]>': true,
451-
'<[!\\?].*?>': true,
452-
'</[^>]+>': true,
453-
'<([a-zA-Z\\-:]+)\\s*': function matchTag(match, tagName) {
454-
this.currentTag = tagName;
455-
456-
return 'inside';
457-
},
458-
},
459-
inside: {
460-
// eat up whitespace
461-
'\\s+': true,
462-
// end of attributes
463-
'>': 'outside',
464-
'(([0-9a-zA-Z\\-:]+)\\s*=\\s*")([^"]*)"': processMatch,
465-
"(([0-9a-zA-Z\\-:]+)\\s*=\\s*')([^']*)'": processMatch,
466-
'(([0-9a-zA-Z\\-:]+)\\s*=\\s*)([^\\s>]+)': processMatch,
467-
},
468-
});
469-
493+
const parser = getParser();
470494
const sources = parser.parse('outside', html, {
471495
currentTag: null,
472496
results: [],
473497
filter: (value) => {
474498
return isUrlRequest(value, options.root);
475499
},
476500
isRelevantTagAttribute: (tag, attribute) => {
501+
// eslint-disable-next-line no-param-reassign
502+
tag = tag.trim();
503+
// eslint-disable-next-line no-param-reassign
504+
attribute = attribute.trim();
505+
477506
return tagsAndAttributes.some((item) => {
478507
const pattern = new RegExp(`^${item}$`, 'i');
479508

test/__snapshots__/attributes-option.test.js.snap

Lines changed: 186 additions & 10 deletions
Large diffs are not rendered by default.

test/__snapshots__/esModule-option.test.js.snap

Lines changed: 51 additions & 3 deletions
Large diffs are not rendered by default.

test/__snapshots__/loader.test.js.snap

Lines changed: 17 additions & 1 deletion
Large diffs are not rendered by default.

test/__snapshots__/minimize-option.test.js.snap

Lines changed: 57 additions & 9 deletions
Large diffs are not rendered by default.

test/__snapshots__/root-option.test.js.snap

Lines changed: 34 additions & 2 deletions
Large diffs are not rendered by default.

test/attributes-option.test.js

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,22 @@ describe("'attributes' option", () => {
164164
});
165165

166166
it('should work with an "array" notations', async () => {
167-
const compiler = getCompiler('simple.js', { attributes: ['img:src'] });
167+
const compiler = getCompiler('simple.js', {
168+
attributes: [
169+
'img:src',
170+
'flag-icon:src',
171+
'MyStrangeTag13:src',
172+
'a-:src',
173+
'a-.:src',
174+
'a--:src',
175+
'aÀ-豈:src',
176+
'aÀ-Ⰰ:src',
177+
// Should not work
178+
'INVALID_TAG_NAME:src',
179+
// Should not work
180+
'invalid-CUSTOM-TAG:src',
181+
],
182+
});
168183
const stats = await compile(compiler);
169184

170185
expect(getModuleSource('./simple.html', stats)).toMatchSnapshot('module');
@@ -177,7 +192,21 @@ describe("'attributes' option", () => {
177192

178193
it('should work with multiple an "array" notations', async () => {
179194
const compiler = getCompiler('simple.js', {
180-
attributes: ['img:src', 'script:src'],
195+
attributes: [
196+
'img:src',
197+
'script:src',
198+
'flag-icon:src',
199+
'MyStrangeTag13:src',
200+
'a-:src',
201+
'a-.:src',
202+
'a--:src',
203+
'aÀ-豈:src',
204+
'aÀ-Ⰰ:src',
205+
// Should not work
206+
'INVALID_TAG_NAME:src',
207+
// Should not work
208+
'invalid-CUSTOM-TAG:src',
209+
],
181210
});
182211
const stats = await compile(compiler);
183212

test/fixtures/simple.html

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,19 @@ <h2>An Ordered HTML List</h2>
197197

198198
<img srcset="#test" src="#test" alt="Elva dressed as a fairy">
199199
<img src="image%20image.png" alt="Test">
200+
201+
<flag-icon src="image.png"></flag-icon>
202+
<MyStrangeTag13 src="image.png" alt="Smiley face" />
203+
<MyStrangeTag13 src="image.png" alt="Smiley face"> Test </MyStrangeTag13>
204+
<a- src="image.png"></a->
205+
<a-. src="image.png">test</a-.>
206+
<a-- src="image.png">test</a-->
207+
<aÀ-豈 src="image.png">test</aÀ-豈>
208+
<aÀ-Ⰰ src="image.png" />
209+
210+
<INVALID_TAG_NAME src="image.png" />
211+
<invalid-CUSTOM-TAG src="image.png" />
212+
213+
<p>Text</p>
214+
<p >Text</p>
215+
<p >Text</p>

0 commit comments

Comments
 (0)