From 71c0b5768ac693595ae7a331ffea8b148362ba1b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 16 Oct 2024 17:06:37 -0400 Subject: [PATCH 1/4] test: update html5 parent test to not rely on changing select rules --- test/html5/test_api.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/test/html5/test_api.rb b/test/html5/test_api.rb index 1294632eff..843550d750 100644 --- a/test/html5/test_api.rb +++ b/test/html5/test_api.rb @@ -319,17 +319,15 @@ def test_node_wrap end def test_node_wrap_uses_parent_node_as_parsing_context_node - doc = Nokogiri.HTML5("") - el = doc.at_css("option") + doc = Nokogiri.HTML5("

") + el = doc.at_css("p") - # fails to parse because `div` is not valid in the context of a `select` element - exception = assert_raises(RuntimeError) { el.wrap("
") } - assert_match(/Failed to parse .* in the context of a 'select' element/, exception.message) + exception = assert_raises(RuntimeError) { el.wrap("") } + assert_match(/Failed to parse .* in the context of a 'body' element/, exception.message) - # parses because `optgroup` is valid in the context of a `select` element - el.wrap("") - assert_equal("optgroup", el.parent.name) - assert_equal("select", el.parent.parent.name) + el.wrap("
") + assert_equal("div", el.parent.name) + assert_equal("body", el.parent.parent.name) end def test_parse_in_context_of_foreign_namespace From 0da25ee6a17d5cf68291c6aafaf7cb2978beb3ce Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 30 Oct 2024 11:23:17 -0400 Subject: [PATCH 2/4] test: update tests to reflect new select parsing rules - update gumbo tests - pin to https://github.com/html5lib/html5lib-tests/pull/178 branch --- gumbo-parser/test/parser.cc | 45 +++++++++++++++++++++++++------------ test/html5lib-tests | 2 +- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/gumbo-parser/test/parser.cc b/gumbo-parser/test/parser.cc index 6a46369bda..74843b5ad6 100644 --- a/gumbo-parser/test/parser.cc +++ b/gumbo-parser/test/parser.cc @@ -1017,17 +1017,29 @@ TEST_F(GumboParserTest, ComplicatedSelect) { GumboNode* body; GetAndAssertBody(root_, &body); - ASSERT_EQ(2, GetChildCount(body)); + ASSERT_EQ(1, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); - ASSERT_EQ(1, GetChildCount(select)); + ASSERT_EQ(2, GetChildCount(select)); + + GumboNode* div = GetChild(select, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); + EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); + + GumboVector* attributes = &div->v.element.attributes; + ASSERT_EQ(1, attributes->length); - GumboNode* optgroup = GetChild(select, 0); + GumboAttribute* klass = static_cast(attributes->data[0]); + EXPECT_EQ(GUMBO_ATTR_NAMESPACE_NONE, klass->attr_namespace); + EXPECT_STREQ("class", klass->name); + EXPECT_STREQ("foo", klass->value); + + GumboNode* optgroup = GetChild(select, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, optgroup->type); EXPECT_EQ(GUMBO_TAG_OPTGROUP, GetTag(optgroup)); - ASSERT_EQ(1, GetChildCount(optgroup)); + ASSERT_EQ(2, GetChildCount(optgroup)); GumboNode* option = GetChild(optgroup, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, option->type); @@ -1038,7 +1050,7 @@ TEST_F(GumboParserTest, ComplicatedSelect) { ASSERT_EQ(GUMBO_NODE_TEXT, text->type); EXPECT_STREQ("Option", text->v.text.text); - GumboNode* input = GetChild(body, 1); + GumboNode* input = GetChild(optgroup, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); @@ -1051,12 +1063,17 @@ TEST_F(GumboParserTest, DoubleSelect) { GetAndAssertBody(root_, &body); ASSERT_EQ(2, GetChildCount(body)); - GumboNode* select = GetChild(body, 0); - ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); - EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); - ASSERT_EQ(0, GetChildCount(select)); + GumboNode* select1 = GetChild(body, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, select1->type); + EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select1)); + ASSERT_EQ(0, GetChildCount(select1)); - GumboNode* div = GetChild(body, 1); + GumboNode* select2 = GetChild(body, 1); + ASSERT_EQ(GUMBO_NODE_ELEMENT, select2->type); + EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select2)); + ASSERT_EQ(1, GetChildCount(select2)); + + GumboNode* div = GetChild(select2, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); @@ -1067,19 +1084,19 @@ TEST_F(GumboParserTest, InputInSelect) { GumboNode* body; GetAndAssertBody(root_, &body); - ASSERT_EQ(3, GetChildCount(body)); + ASSERT_EQ(1, GetChildCount(body)); GumboNode* select = GetChild(body, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, select->type); EXPECT_EQ(GUMBO_TAG_SELECT, GetTag(select)); - ASSERT_EQ(0, GetChildCount(select)); + ASSERT_EQ(2, GetChildCount(select)); - GumboNode* input = GetChild(body, 1); + GumboNode* input = GetChild(select, 0); ASSERT_EQ(GUMBO_NODE_ELEMENT, input->type); EXPECT_EQ(GUMBO_TAG_INPUT, GetTag(input)); ASSERT_EQ(0, GetChildCount(input)); - GumboNode* div = GetChild(body, 2); + GumboNode* div = GetChild(select, 1); ASSERT_EQ(GUMBO_NODE_ELEMENT, div->type); EXPECT_EQ(GUMBO_TAG_DIV, GetTag(div)); ASSERT_EQ(0, GetChildCount(div)); diff --git a/test/html5lib-tests b/test/html5lib-tests index c67f90eaca..e770e89579 160000 --- a/test/html5lib-tests +++ b/test/html5lib-tests @@ -1 +1 @@ -Subproject commit c67f90eacac14e022b1f2c2e5ac559879581e9ff +Subproject commit e770e895797672d567e0de47eb17fcfcf0b6f809 From 9d56278015f1df377fc4a8194c5741bd139304f7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 16 Oct 2024 15:33:59 -0400 Subject: [PATCH 3/4] feat: implement the new select parsing rules https://github.com/whatwg/html/pull/10557 This is green (modulo errors) up to whatwg/html@f8d14341 plus the correction I suggested to the spec in https://github.com/whatwg/html/pull/10557#discussion_r1893259445 --- gumbo-parser/src/insertion_mode.h | 2 - gumbo-parser/src/parser.c | 250 +++++------------------------- gumbo-parser/test/parser.cc | 7 +- test/html5lib-tests | 2 +- 4 files changed, 45 insertions(+), 216 deletions(-) diff --git a/gumbo-parser/src/insertion_mode.h b/gumbo-parser/src/insertion_mode.h index 6cb1d341f0..b60054e40b 100644 --- a/gumbo-parser/src/insertion_mode.h +++ b/gumbo-parser/src/insertion_mode.h @@ -20,8 +20,6 @@ typedef enum { GUMBO_INSERTION_MODE_IN_TABLE_BODY, GUMBO_INSERTION_MODE_IN_ROW, GUMBO_INSERTION_MODE_IN_CELL, - GUMBO_INSERTION_MODE_IN_SELECT, - GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE, GUMBO_INSERTION_MODE_IN_TEMPLATE, GUMBO_INSERTION_MODE_AFTER_BODY, GUMBO_INSERTION_MODE_IN_FRAMESET, diff --git a/gumbo-parser/src/parser.c b/gumbo-parser/src/parser.c index 06f096f8dd..85c8a509b4 100644 --- a/gumbo-parser/src/parser.c +++ b/gumbo-parser/src/parser.c @@ -670,21 +670,6 @@ static GumboInsertionMode get_appropriate_insertion_mode ( } switch (node->v.element.tag) { - case GUMBO_TAG_SELECT: { - if (is_last) { - return GUMBO_INSERTION_MODE_IN_SELECT; - } - for (int i = index; i > 0; --i) { - const GumboNode* ancestor = open_elements->data[i]; - if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) { - return GUMBO_INSERTION_MODE_IN_SELECT; - } - if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) { - return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE; - } - } - return GUMBO_INSERTION_MODE_IN_SELECT; - } case GUMBO_TAG_TD: case GUMBO_TAG_TH: if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL; @@ -1610,6 +1595,7 @@ static bool has_open_element(const GumboParser* parser, GumboTag tag) { TAG(TH), \ TAG(MARQUEE), \ TAG(OBJECT), \ + TAG(SELECT), \ TAG(TEMPLATE), \ TAG_MATHML(MI), \ TAG_MATHML(MO), \ @@ -1694,12 +1680,6 @@ static bool has_an_element_in_table_scope(const GumboParser* parser, GumboTag ta return has_an_element_in_specific_scope(parser, 1, &tag, false, &tags); } -// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope -static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag tag) { - static const TagSet tags = {TAG(OPTGROUP), TAG(OPTION)}; - return has_an_element_in_specific_scope(parser, 1, &tag, true, &tags); -} - // https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags // "exception" is the "element to exclude from the process" listed in the spec. // Pass GUMBO_TAG_LAST to not exclude any of them. @@ -1804,18 +1784,6 @@ static void close_current_cell(GumboParser* parser, const GumboToken* token) { close_table_cell(parser, token, cell_tag); } -// This factors out the "act as if an end tag of tag name 'select' had been -// seen" clause of the spec, since it's referenced in several places. It pops -// all nodes from the stack until the current