From 4e9de514b0525f3e3ef32227377acee02e384a90 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Thu, 4 Jul 2024 21:45:07 +0900 Subject: [PATCH] fix: Extra content at the end of the document ## Why? XML with additional content at the end of the document is invalid. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc ``` [27] Misc ::= Comment | PI | S ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PI ``` [16] PI ::= '' Char*)))? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget ``` [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ``` --- lib/rexml/parsers/baseparser.rb | 9 ++++++ test/parse/test_comment.rb | 12 ++++++++ test/parse/test_element.rb | 34 +++++++++++++++++++++++ test/parse/test_processing_instruction.rb | 12 ++++++++ test/parse/test_text.rb | 25 +++++++++++++++++ 5 files changed, 92 insertions(+) create mode 100644 test/parse/test_text.rb diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 02759e70..1b3aff5a 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -460,8 +460,12 @@ def pull_event @closed = tag @nsstack.shift else + if @tags.empty? and @have_root + raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) + end @tags.push( tag ) end + @have_root = true return [ :start_element, tag, attributes ] end else @@ -469,6 +473,11 @@ def pull_event if text.chomp!("<") @source.position -= "<".bytesize end + if @tags.empty? and @have_root + if text.strip != "" + raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + end + end return [ :text, text ] end rescue REXML::UndefinedNamespaceException diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb index ce6678e8..b90b1038 100644 --- a/test/parse/test_comment.rb +++ b/test/parse/test_comment.rb @@ -105,5 +105,17 @@ def test_after_doctype_malformed_comment_end DETAIL end end + + def test_extra_comments_at_the_end_of_the_document + parser = REXML::Parsers::BaseParser.new('') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end end end diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb index 14d0703a..f0fb5ee0 100644 --- a/test/parse/test_element.rb +++ b/test/parse/test_element.rb @@ -85,6 +85,40 @@ def test_garbage_less_than_slash_before_end_tag_at_line_start DETAIL end + + def test_multiple_root_elements + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra tag at the end of the document (got '') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra tag at the end of the document (got '') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end end end diff --git a/test/parse/test_text.rb b/test/parse/test_text.rb new file mode 100644 index 00000000..ca21ec7a --- /dev/null +++ b/test/parse/test_text.rb @@ -0,0 +1,25 @@ +require "test/unit" +require 'rexml/parsers/baseparser' + +module REXMLTests + class TestParseText < Test::Unit::TestCase + class TestInvalid < self + def test_extra_content_at_the_end_of_the_document + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('c') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: Extra content at the end of the document (got 'c') + Line: 1 + Position: 8 + Last 80 unconsumed characters: + + DETAIL + end + end + end +end