diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 02759e70..8ebb38c9 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -146,6 +146,7 @@ def initialize( source ) self.stream = source @listeners = [] @prefixes = Set.new + @root_tag = nil end def add_listener( listener ) @@ -460,8 +461,12 @@ def pull_event @closed = tag @nsstack.shift else + if @tags.size.zero? && !@root_tag.nil? + raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) + end @tags.push( tag ) end + @root_tag ||= tag return [ :start_element, tag, attributes ] end else @@ -469,6 +474,11 @@ def pull_event if text.chomp!("<") @source.position -= "<".bytesize end + if @tags.size.zero? && !@root_tag.nil? + if text.strip != "" + raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + end + end return [ :text, text ] end rescue REXML::UndefinedNamespaceException @@ -635,6 +645,9 @@ def process_instruction(start_position) @source.position = start_position raise REXML::ParseException.new(message, @source) end + if @tags.size.zero? && !@root_tag.nil? && match_data[1] == "xml" + raise ParseException.new("Malformed XML: Extra XML declaration at the end of the document (got '') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra tag at the end of the document (got 'c') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra content at the end of the document (got 'c') +Line: 1 +Position: 8 +Last 80 unconsumed characters: + + DETAIL + end + + def test_parse_exception_for_extra_xml_declaration_at_the_end_of_the_document + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra XML declaration at the end of the document (got '') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end + + def test_extra_comments_at_the_end_of_the_document + parser = REXML::Parsers::BaseParser.new('') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end end end