From ebc3e85bfa2796fb4922c1932760bec8390ff87c Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 8 Jul 2024 05:54:06 +0900 Subject: [PATCH] Add position check for XML declaration (#162) ## Why? XML declaration must be the first item. https://www.w3.org/TR/2006/REC-xml11-20060816/#document ``` [1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* ) ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-prolog ``` [22] prolog ::= XMLDecl Misc* (doctypedecl Misc*)? ``` https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-XMLDecl ``` [23] XMLDecl ::= '' ``` See: https://github.com/ruby/rexml/pull/161#discussion_r1666118193 --- lib/rexml/parsers/baseparser.rb | 5 ++++- test/parse/test_processing_instruction.rb | 17 +++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 900c19cc..2a448e13 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -644,7 +644,10 @@ def process_instruction(start_position) @source.position = start_position raise REXML::ParseException.new(message, @source) end - if @document_status.nil? and match_data[1] == "xml" + if match_data[1] == "xml" + if @document_status + raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) + end content = match_data[2] version = VERSION.match(content) version = version[1] unless version.nil? diff --git a/test/parse/test_processing_instruction.rb b/test/parse/test_processing_instruction.rb index 40dadd11..13384935 100644 --- a/test/parse/test_processing_instruction.rb +++ b/test/parse/test_processing_instruction.rb @@ -39,6 +39,23 @@ def test_garbage_text pi.content, ]) end + + def test_xml_declaration_not_at_document_start + exception = assert_raise(REXML::ParseException) do + parser = REXML::Parsers::BaseParser.new('') + while parser.has_next? + parser.pull + end + end + + assert_equal(<<~DETAIL.chomp, exception.to_s) + Malformed XML: XML declaration is not at the start + Line: 1 + Position: 25 + Last 80 unconsumed characters: + + DETAIL + end end def test_after_root