From 641d9d1a53882c668d1fcaa3cec50fba05c85836 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Thu, 4 Jul 2024 21:45:07 +0900 Subject: [PATCH] fix: Extra content at the end of the document ## Why? XML with additional content at the end of the document is invalid. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc ``` [27] Misc ::= Comment | PI | S ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PI ``` [16] PI ::= '' Char*)))? '?>' ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-PITarget ``` [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) ``` --- lib/rexml/parsers/baseparser.rb | 13 ++++++ test/parser/test_base_parser.rb | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 02759e70..8ebb38c9 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -146,6 +146,7 @@ def initialize( source ) self.stream = source @listeners = [] @prefixes = Set.new + @root_tag = nil end def add_listener( listener ) @@ -460,8 +461,12 @@ def pull_event @closed = tag @nsstack.shift else + if @tags.size.zero? && !@root_tag.nil? + raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) + end @tags.push( tag ) end + @root_tag ||= tag return [ :start_element, tag, attributes ] end else @@ -469,6 +474,11 @@ def pull_event if text.chomp!("<") @source.position -= "<".bytesize end + if @tags.size.zero? && !@root_tag.nil? + if text.strip != "" + raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) + end + end return [ :text, text ] end rescue REXML::UndefinedNamespaceException @@ -635,6 +645,9 @@ def process_instruction(start_position) @source.position = start_position raise REXML::ParseException.new(message, @source) end + if @tags.size.zero? && !@root_tag.nil? && match_data[1] == "xml" + raise ParseException.new("Malformed XML: Extra XML declaration at the end of the document (got '') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra tag at the end of the document (got 'c') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra content at the end of the document (got 'c') +Line: 1 +Position: 8 +Last 80 unconsumed characters: + + DETAIL + end + + def test_parse_exception_for_extra_xml_declaration_at_the_end_of_the_document + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed XML: Extra XML declaration at the end of the document (got '') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal("abc", events[:processing_instruction]) + end + + def test_extra_comments_at_the_end_of_the_document + parser = REXML::Parsers::BaseParser.new('') + + events = {} + while parser.has_next? + event = parser.pull + events[event[0]] = event[1] + end + + assert_equal(" ok comment ", events[:comment]) + end end end