diff --git a/gumbo-parser/src/parser.c b/gumbo-parser/src/parser.c
index edb7923c9a2..b68443d92cc 100644
--- a/gumbo-parser/src/parser.c
+++ b/gumbo-parser/src/parser.c
@@ -4418,6 +4418,7 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|| token_has_attribute(token, "size")
)
)
+ || tag_in(token, kEndTag, &(const TagSet) { TAG(BR), TAG(P) })
) {
/* Parse error */
parser_add_parse_error(parser, token);
@@ -4427,20 +4428,23 @@ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
* fragment parsing algorithm, then act as described in the "any other
* start tag" entry below.
*/
- if (!is_fragment_parser(parser)) {
- do {
- pop_current_node(parser);
- } while (
- !(
- is_mathml_integration_point(get_current_node(parser))
- || is_html_integration_point(get_current_node(parser))
- || get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
- )
- );
- parser->_parser_state->_reprocess_current_token = true;
- return;
+ while (
+ !(
+ is_mathml_integration_point(get_current_node(parser))
+ || is_html_integration_point(get_current_node(parser))
+ || get_current_node(parser)->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
+ )
+ ) {
+ pop_current_node(parser);
}
- // This is a start tag so the next if's then branch will be taken.
+ // XXX: The spec currently says to handle this using the in body insertion
+ // mode rules. That seems wrong. See
+ //
quux");
+ EXPECT_EQ(1, GetChildCount(root_));
+ GumboNode* html = GetChild(root_, 0);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, html->type);
+ EXPECT_EQ(GUMBO_TAG_HTML, html->v.element.tag);
+ EXPECT_EQ(2, GetChildCount(html));
+
+ GumboNode* body = GetChild(html, 1);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, body->type);
+ EXPECT_EQ(GUMBO_TAG_BODY, body->v.element.tag);
+ EXPECT_EQ(4, GetChildCount(body));
+
+ GumboNode* svg = GetChild(body, 0);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, svg->type);
+ EXPECT_EQ(GUMBO_TAG_SVG, svg->v.element.tag);
+ EXPECT_EQ(GUMBO_NAMESPACE_SVG, svg->v.element.tag_namespace);
+ EXPECT_EQ(2, GetChildCount(svg));
+
+ GumboNode* g = GetChild(svg, 0);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, g->type);
+ EXPECT_EQ(std::string("g"), g->v.element.name);
+ EXPECT_EQ(GUMBO_NAMESPACE_SVG, g->v.element.tag_namespace);
+ EXPECT_EQ(1, GetChildCount(g));
+
+ GumboNode* text = GetChild(g, 0);
+ ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+ EXPECT_EQ(std::string("foo"), text->v.text.text);
+
+ g = GetChild(svg, 1);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, g->type);
+ EXPECT_EQ(std::string("g"), g->v.element.name);
+ EXPECT_EQ(GUMBO_NAMESPACE_SVG, g->v.element.tag_namespace);
+ EXPECT_EQ(1, GetChildCount(g));
+
+ text = GetChild(g, 0);
+ ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+ EXPECT_EQ(std::string("bar"), text->v.text.text);
+
+ GumboNode* p = GetChild(body, 1);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
+ EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
+ EXPECT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace);
+ EXPECT_EQ(1, GetChildCount(p));
+
+ text = GetChild(p, 0);
+ ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+ EXPECT_EQ(std::string("baz"), text->v.text.text);
+
+ GumboNode* table = GetChild(body, 2);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, table->type);
+ EXPECT_EQ(GUMBO_TAG_TABLE, table->v.element.tag);
+ EXPECT_EQ(GUMBO_NAMESPACE_HTML, table->v.element.tag_namespace);
+ EXPECT_EQ(1, GetChildCount(table));
+
+ GumboNode* colgroup = GetChild(table, 0);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, colgroup->type);
+ EXPECT_EQ(GUMBO_TAG_COLGROUP, colgroup->v.element.tag);
+ EXPECT_EQ(GUMBO_NAMESPACE_HTML, colgroup->v.element.tag_namespace);
+ EXPECT_EQ(0, GetChildCount(colgroup));
+
+ p = GetChild(body, 3);
+ ASSERT_EQ(GUMBO_NODE_ELEMENT, p->type);
+ EXPECT_EQ(GUMBO_TAG_P, p->v.element.tag);
+ EXPECT_EQ(GUMBO_NAMESPACE_HTML, p->v.element.tag_namespace);
+ EXPECT_EQ(1, GetChildCount(p));
+
+ text = GetChild(p, 0);
+ ASSERT_EQ(GUMBO_NODE_TEXT, text->type);
+ EXPECT_EQ(std::string("quux"), text->v.text.text);
+}
+
} // namespace
diff --git a/test/html5/test_tree-construction.rb b/test/html5/test_tree-construction.rb
index 191142c2dbd..37fdee20d4e 100644
--- a/test/html5/test_tree-construction.rb
+++ b/test/html5/test_tree-construction.rb
@@ -77,9 +77,14 @@ def parse_test(test_data)
node[:name] = $~[1]
node[:public_id] = $~[2].nil? || $~[2].empty? ? nil : $~[2]
node[:system_id] = $~[3].nil? || $~[3].empty? ? nil : $~[3]
- elsif /^$/ =~ node_text
+ elsif node_text.start_with?('')
+ index += 1
+ node_text << "\n" + lines[index]
+ end
node[:type] = :comment
- node[:contents] = $~[1]
+ node[:contents] = node_text[5..-5]
elsif /^<(svg |math )?(.+)>$/ =~ node_text
node[:type] = :element
node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
@@ -154,7 +159,7 @@ def compare_nodes(node, ng_node)
assert_equal(attr[:value], value)
end
assert_equal(node[:children].length, ng_node.children.length,
- "Element <#{node[:tag]}> has wrong number of children: #{ng_node.children.map { |c| c.name }}")
+ "Element <#{node[:tag]}> has wrong number of children #{ng_node.children.map { |c| c.name }} in #{@test[:data]}")
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
# We preserve the CDATA in the tree, but the tests represent it as text.
assert_equal(node[:type], :text)
@@ -167,7 +172,8 @@ def compare_nodes(node, ng_node)
assert_equal(node[:children].length, ng_node.children.length)
when Nokogiri::XML::Node::DOCUMENT_FRAG_NODE
assert_equal(node[:type], :fragment)
- assert_equal(node[:children].length, ng_node.children.length)
+ assert_equal(node[:children].length, ng_node.children.length,
+ "Fragment node has wrong number of children #{ng_node.children.map { |c| c.name }} in #{@test[:data]}")
when Nokogiri::XML::Node::DTD_NODE
assert_equal(node[:type], :doctype)
assert_equal(node[:name], ng_node.name)
@@ -212,7 +218,7 @@ def run_test
end
# Test the errors.
- assert_equal(@test[:errors].length, doc.errors.length)
+ assert_equal(@test[:errors].length, doc.errors.length, "Wrong number of errors for #{@test[:data]}")
# The new, standardized tokenizer errors live in @test[:new_errors]. Let's
# match each one to exactly one error in doc.errors. Unfortunately, the
@@ -224,7 +230,7 @@ def run_test
errors.reject! { |err| err[:code] == "generic-parser" }
error_regex = /^\((?.*)$/
@test[:new_errors].each do |err|
- assert_match(error_regex, err)
+ assert_match(error_regex, err, "New error format does not match: #{mu_pp(err)}")
m = err.match(error_regex)
line = m[:line].to_i
column = m[:column].to_i
@@ -236,7 +242,7 @@ def run_test
end
# This error should be the first error in the list.
# refute_nil(idx, "Expected to find error #{code} at #{line}:#{column}")
- assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column}")
+ assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column} in #{@test[:data]}")
errors.delete_at(idx)
end
end
diff --git a/test/html5lib-tests b/test/html5lib-tests
index e379d7a17b1..535e74b4759 160000
--- a/test/html5lib-tests
+++ b/test/html5lib-tests
@@ -1 +1 @@
-Subproject commit e379d7a17b18cd1bb57aec4f62edec67578d294a
+Subproject commit 535e74b4759d94fdc4038d2da9d6b70da6287614