diff --git a/gumbo-parser/src/parser.c b/gumbo-parser/src/parser.c index edb7923c9a2..b68443d92cc 100644 --- a/gumbo-parser/src/parser.c +++ b/gumbo-parser/src/parser.c @@ -4418,6 +4418,7 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) { || token_has_attribute(token, "size") ) ) + || tag_in(token, kEndTag, &(const TagSet) { TAG(BR), TAG(P) }) ) { /* Parse error */ parser_add_parse_error(parser, token); @@ -4427,20 +4428,23 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) { * fragment parsing algorithm, then act as described in the "any other * start tag" entry below. */ - if (!is_fragment_parser(parser)) { - do { - pop_current_node(parser); - } while ( - !( - is_mathml_integration_point(get_current_node(parser)) - || is_html_integration_point(get_current_node(parser)) - || get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML - ) - ); - parser->_parser_state->_reprocess_current_token = true; - return; + while ( + !( + is_mathml_integration_point(get_current_node(parser)) + || is_html_integration_point(get_current_node(parser)) + || get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML + ) + ) { + pop_current_node(parser); } - // This is a start tag so the next if's then branch will be taken. + // XXX: The spec currently says to handle this using the in body insertion + // mode rules. That seems wrong. See + // . Instead, use the current + // insertion mode which seems like it works. + // + // handle_in_body(parser, token); + handle_html_content(parser, token); + return; } if (token->type == GUMBO_TOKEN_START_TAG) { diff --git a/gumbo-parser/test/parser.cc b/gumbo-parser/test/parser.cc index 81dc0d0bc4b..0832f919de7 100644 --- a/gumbo-parser/test/parser.cc +++ b/gumbo-parser/test/parser.cc @@ -2220,4 +2220,96 @@ TEST_F(GumboParserTest, FragmentWithoutForm) { EXPECT_EQ(0, GetChildCount(span)); } +TEST_F(GumboParserTest, ForeignFragment) { + ParseFragment("

", "svg", GUMBO_NAMESPACE_SVG); + EXPECT_EQ(1, GetChildCount(root_)); + GumboNode* html = GetChild(root_, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); + EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); + EXPECT_EQ(2, GetChildCount(html)); + + ASSERT_EQ(2, GetChildCount(html)); + GumboNode* p = GetChild(html, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type); + ASSERT_EQ(GUMBO_TAG_P, p->v.element.tag); + ASSERT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace); + + GumboNode* foo = GetChild(html, 1); + ASSERT_EQ(GUMBO_NODE_ELEMENT, foo->type); + ASSERT_EQ(std::string("foo"), foo->v.element.name); + ASSERT_EQ(GUMBO_NAMESPACE_SVG, foo->v.element.tag_namespace); +} + +TEST_F(GumboParserTest, FosterParenting) { + Parse("foobar

baz

quux"); + EXPECT_EQ(1, GetChildCount(root_)); + GumboNode* html = GetChild(root_, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type); + EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag); + EXPECT_EQ(2, GetChildCount(html)); + + GumboNode* body = GetChild(html, 1); + ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type); + EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag); + EXPECT_EQ(4, GetChildCount(body)); + + GumboNode* svg = GetChild(body, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, svg->type); + EXPECT_EQ(GUMBO_TAG_SVG, svg->v.element.tag); + EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg->v.element.tag_namespace); + EXPECT_EQ(2, GetChildCount(svg)); + + GumboNode* g = GetChild(svg, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, g->type); + EXPECT_EQ(std::string("g"), g->v.element.name); + EXPECT_EQ(GUMBO_NAMESPACE_SVG, g->v.element.tag_namespace); + EXPECT_EQ(1, GetChildCount(g)); + + GumboNode* text = GetChild(g, 0); + ASSERT_EQ(GUMBO_NODE_TEXT, text->type); + EXPECT_EQ(std::string("foo"), text->v.text.text); + + g = GetChild(svg, 1); + ASSERT_EQ(GUMBO_NODE_ELEMENT, g->type); + EXPECT_EQ(std::string("g"), g->v.element.name); + EXPECT_EQ(GUMBO_NAMESPACE_SVG, g->v.element.tag_namespace); + EXPECT_EQ(1, GetChildCount(g)); + + text = GetChild(g, 0); + ASSERT_EQ(GUMBO_NODE_TEXT, text->type); + EXPECT_EQ(std::string("bar"), text->v.text.text); + + GumboNode* p = GetChild(body, 1); + ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type); + EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag); + EXPECT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace); + EXPECT_EQ(1, GetChildCount(p)); + + text = GetChild(p, 0); + ASSERT_EQ(GUMBO_NODE_TEXT, text->type); + EXPECT_EQ(std::string("baz"), text->v.text.text); + + GumboNode* table = GetChild(body, 2); + ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type); + EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag); + EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace); + EXPECT_EQ(1, GetChildCount(table)); + + GumboNode* colgroup = GetChild(table, 0); + ASSERT_EQ(GUMBO_NODE_ELEMENT, colgroup->type); + EXPECT_EQ(GUMBO_TAG_COLGROUP, colgroup->v.element.tag); + EXPECT_EQ(GUMBO_NAMESPACE_HTML, colgroup->v.element.tag_namespace); + EXPECT_EQ(0, GetChildCount(colgroup)); + + p = GetChild(body, 3); + ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type); + EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag); + EXPECT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace); + EXPECT_EQ(1, GetChildCount(p)); + + text = GetChild(p, 0); + ASSERT_EQ(GUMBO_NODE_TEXT, text->type); + EXPECT_EQ(std::string("quux"), text->v.text.text); +} + } // namespace diff --git a/test/html5/test_tree-construction.rb b/test/html5/test_tree-construction.rb index 191142c2dbd..37fdee20d4e 100644 --- a/test/html5/test_tree-construction.rb +++ b/test/html5/test_tree-construction.rb @@ -77,9 +77,14 @@ def parse_test(test_data) node[:name] = $~[1] node[:public_id] = $~[2].nil? || $~[2].empty? ? nil : $~[2] node[:system_id] = $~[3].nil? || $~[3].empty? ? nil : $~[3] - elsif /^$/ =~ node_text + elsif node_text.start_with?('') + index += 1 + node_text << "\n" + lines[index] + end node[:type] = :comment - node[:contents] = $~[1] + node[:contents] = node_text[5..-5] elsif /^<(svg |math )?(.+)>$/ =~ node_text node[:type] = :element node[:ns] = $~[1].nil? ? nil : $~[1].rstrip @@ -154,7 +159,7 @@ def compare_nodes(node, ng_node) assert_equal(attr[:value], value) end assert_equal(node[:children].length, ng_node.children.length, - "Element <#{node[:tag]}> has wrong number of children: #{ng_node.children.map { |c| c.name }}") + "Element <#{node[:tag]}> has wrong number of children #{ng_node.children.map { |c| c.name }} in #{@test[:data]}") when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE # We preserve the CDATA in the tree, but the tests represent it as text. assert_equal(node[:type], :text) @@ -167,7 +172,8 @@ def compare_nodes(node, ng_node) assert_equal(node[:children].length, ng_node.children.length) when Nokogiri::XML::Node::DOCUMENT_FRAG_NODE assert_equal(node[:type], :fragment) - assert_equal(node[:children].length, ng_node.children.length) + assert_equal(node[:children].length, ng_node.children.length, + "Fragment node has wrong number of children #{ng_node.children.map { |c| c.name }} in #{@test[:data]}") when Nokogiri::XML::Node::DTD_NODE assert_equal(node[:type], :doctype) assert_equal(node[:name], ng_node.name) @@ -212,7 +218,7 @@ def run_test end # Test the errors. - assert_equal(@test[:errors].length, doc.errors.length) + assert_equal(@test[:errors].length, doc.errors.length, "Wrong number of errors for #{@test[:data]}") # The new, standardized tokenizer errors live in @test[:new_errors]. Let's # match each one to exactly one error in doc.errors. Unfortunately, the @@ -224,7 +230,7 @@ def run_test errors.reject! { |err| err[:code] == "generic-parser" } error_regex = /^\((?\d+):(?\d+)(?:-\d+:\d+)?\) (?.*)$/ @test[:new_errors].each do |err| - assert_match(error_regex, err) + assert_match(error_regex, err, "New error format does not match: #{mu_pp(err)}") m = err.match(error_regex) line = m[:line].to_i column = m[:column].to_i @@ -236,7 +242,7 @@ def run_test end # This error should be the first error in the list. # refute_nil(idx, "Expected to find error #{code} at #{line}:#{column}") - assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column}") + assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column} in #{@test[:data]}") errors.delete_at(idx) end end diff --git a/test/html5lib-tests b/test/html5lib-tests index e379d7a17b1..535e74b4759 160000 --- a/test/html5lib-tests +++ b/test/html5lib-tests @@ -1 +1 @@ -Subproject commit e379d7a17b18cd1bb57aec4f62edec67578d294a +Subproject commit 535e74b4759d94fdc4038d2da9d6b70da6287614