From 1e1c60f8967332832f336ee3a662200bfede58c1 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 22 Sep 2024 22:13:30 +0900 Subject: [PATCH 1/2] add namespace test case for SAX2Parser --- test/test_sax.rb | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/test_sax.rb b/test/test_sax.rb index 6aaeb618..caec983b 100644 --- a/test/test_sax.rb +++ b/test/test_sax.rb @@ -99,6 +99,52 @@ def test_sax2 end end + def test_without_namespace + xml = <<-XML + + + + + + XML + + parser = REXML::Parsers::SAX2Parser.new(xml) + elements = [] + parser.listen(:start_element) do |uri, localname, qname, attrs| + elements << [uri, localname, qname, attrs] + end + parser.parse + assert_equal([ + [nil, "root", "root", {}], + [nil, "a", "a", {"att1"=>"1", "att2"=>"2", "att3"=>"<"}], + [nil, "b", "b", {}] + ], elements) + end + + def test_with_namespace + xml = <<-XML + + + + + + XML + + parser = REXML::Parsers::SAX2Parser.new(xml) + elements = [] + parser.listen(:start_element) do |uri, localname, qname, attrs| + elements << [uri, localname, qname, attrs] + end + parser.parse + assert_equal([ + ["http://example.org/default", "root", "root", {"xmlns"=>"http://example.org/default", "xmlns:bar"=>"http://example.org/bar", "xmlns:foo"=>"http://example.org/foo"}], + ["http://example.org/default", "a", "a", {"att"=>"<", "bar:att"=>"2", "foo:att"=>"1"}], + ["http://example.org/bar", "b", "bar:b", {}] + ], elements) + end + class EntityExpansionLimitTest < Test::Unit::TestCase class GeneralEntityTest < self def test_have_value From 372ffb8c61cfb58dd00ee968fffb7acf242863ca Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 22 Sep 2024 22:14:39 +0900 Subject: [PATCH 2/2] optimize SAX2Parser#get_namespace ``` RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.4/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin22] Calculating ------------------------------------- before after before(YJIT) after(YJIT) dom 18.085 17.677 33.086 32.778 i/s - 100.000 times in 5.529372s 5.657097s 3.022471s 3.050832s sax 25.450 26.182 44.797 47.916 i/s - 100.000 times in 3.929249s 3.819475s 2.232309s 2.086982s pull 29.160 29.089 55.407 53.531 i/s - 100.000 times in 3.429304s 3.437757s 1.804825s 1.868072s stream 29.137 29.055 52.780 51.368 i/s - 100.000 times in 3.432007s 3.441754s 1.894649s 1.946724s Comparison: dom before(YJIT): 33.1 i/s after(YJIT): 32.8 i/s - 1.01x slower before: 18.1 i/s - 1.83x slower after: 17.7 i/s - 1.87x slower sax after(YJIT): 47.9 i/s before(YJIT): 44.8 i/s - 1.07x slower after: 26.2 i/s - 1.83x slower before: 25.5 i/s - 1.88x slower pull before(YJIT): 55.4 i/s after(YJIT): 53.5 i/s - 1.04x slower before: 29.2 i/s - 1.90x slower after: 29.1 i/s - 1.90x slower stream before(YJIT): 52.8 i/s after(YJIT): 51.4 i/s - 1.03x slower before: 29.1 i/s - 1.81x slower after: 29.1 i/s - 1.82x slower ``` - sax - YJIT=ON : 1.07x faster - YJIT=OFF : 1.03x faster --- lib/rexml/parsers/sax2parser.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 5452d4b8..a51477de 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -259,6 +259,8 @@ def add( pair ) end def get_namespace( prefix ) + return nil if @namespace_stack.empty? + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || (@namespace_stack.find { |ns| not ns[nil].nil? }) uris[-1][prefix] unless uris.nil? or 0 == uris.size