From 71a4f8240270fd7351a609f50af3ca7674f3d620 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Thu, 20 Jun 2024 08:17:55 +0900 Subject: [PATCH] If the size of the content parsed by StringScanner to parse huge XML exceeds a certain size, have it removed. See: https://github.com/ruby/rexml/issues/150 --------- Co-authored-by: Sutou Kouhei --- lib/rexml/parsers/baseparser.rb | 2 ++ lib/rexml/source.rb | 7 +++++++ test/parser/test_base_parser.rb | 27 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 test/parser/test_base_parser.rb diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 5791ab1d..a003ac29 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -204,6 +204,8 @@ def peek depth=0 # Returns the next event. This is a +PullEvent+ object. def pull + @source.drop_parsed_content + pull_event.tap do |event| @listeners.each do |listener| listener.receive event diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 67154832..216e6157 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -55,6 +55,7 @@ class Source attr_reader :encoding module Private + SCANNER_RESET_SIZE = 100000 PRE_DEFINED_TERM_PATTERNS = {} pre_defined_terms = ["'", '"', "<"] pre_defined_terms.each do |term| @@ -84,6 +85,12 @@ def buffer @scanner.rest end + def drop_parsed_content + if @scanner.pos > SCANNER_RESET_SIZE + @scanner.string = @scanner.rest + end + end + def buffer_encoding=(encoding) @scanner.string.force_encoding(encoding) end diff --git a/test/parser/test_base_parser.rb b/test/parser/test_base_parser.rb new file mode 100644 index 00000000..17d01979 --- /dev/null +++ b/test/parser/test_base_parser.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: false + +require 'rexml/parsers/baseparser' + +module REXMLTests + class BaseParserTester < Test::Unit::TestCase + def test_large_xml + large_text = "a" * 100_000 + xml = <<-XML + + + #{large_text} + #{large_text} + + XML + + parser = REXML::Parsers::BaseParser.new(xml) + while parser.has_next? + parser.pull + end + + assert do + parser.position < xml.bytesize + end + end + end +end