Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for detecting invalid XML that has unsupported content before root element #184

Merged
merged 1 commit into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -486,11 +486,15 @@ def pull_event
if text.chomp!("<")
@source.position -= "<".bytesize
end
if @tags.empty? and @have_root
if @tags.empty?
unless /\A\s*\z/.match?(text)
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
if @have_root
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
else
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
end
end
return pull_event
return pull_event if @have_root
end
return [ :text, text ]
end
Expand Down
12 changes: 12 additions & 0 deletions test/parse/test_comment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ def test_after_doctype_malformed_comment_end
end
end

def test_before_root
parser = REXML::Parsers::BaseParser.new('<!-- ok comment --><a></a>')

events = {}
while parser.has_next?
event = parser.pull
events[event[0]] = event[1]
end

assert_equal(" ok comment ", events[:comment])
end

def test_after_root
parser = REXML::Parsers::BaseParser.new('<a></a><!-- ok comment -->')

Expand Down
43 changes: 24 additions & 19 deletions test/parse/test_processing_instruction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,6 @@ def test_no_name
DETAIL
end

def test_garbage_text
# TODO: This should be parse error.
# Create test/parse/test_document.rb or something and move this to it.
doc = parse(<<-XML)
x<?x y
<!--?><?x -->?>
<r/>
XML
pi = doc.children[1]
assert_equal([
"x",
"y\n<!--",
],
[
pi.target,
pi.content,
])
end

def test_xml_declaration_not_at_document_start
exception = assert_raise(REXML::ParseException) do
parser = REXML::Parsers::BaseParser.new('<a><?xml version="1.0" ?></a>')
Expand All @@ -62,6 +43,30 @@ def test_xml_declaration_not_at_document_start
end
end

def test_comment
doc = parse(<<-XML)
<?x y
<!--?><?x -->?>
<r/>
XML
assert_equal([["x", "y\n<!--"],
["x", "-->"]],
[[doc.children[0].target, doc.children[0].content],
[doc.children[1].target, doc.children[1].content]])
end

def test_before_root
parser = REXML::Parsers::BaseParser.new('<?abc version="1.0" ?><a></a>')

events = {}
while parser.has_next?
event = parser.pull
events[event[0]] = event[1]
end

assert_equal("abc", events[:processing_instruction])
end

def test_after_root
parser = REXML::Parsers::BaseParser.new('<a></a><?abc version="1.0" ?>')

Expand Down
17 changes: 17 additions & 0 deletions test/parse/test_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@
module REXMLTests
class TestParseText < Test::Unit::TestCase
class TestInvalid < self
def test_before_root
exception = assert_raise(REXML::ParseException) do
parser = REXML::Parsers::BaseParser.new('b<a></a>')
while parser.has_next?
parser.pull
end
end

assert_equal(<<~DETAIL.chomp, exception.to_s)
Malformed XML: Content at the start of the document (got 'b')
Line: 1
Position: 4
Last 80 unconsumed characters:
<a>
DETAIL
end

def test_after_root
exception = assert_raise(REXML::ParseException) do
parser = REXML::Parsers::BaseParser.new('<a></a>c')
Expand Down