From d90d7491bce2efc4169f664bebafec5c123f6efd Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Sat, 19 Oct 2024 12:55:57 +0200 Subject: [PATCH] Reduce encoding benchmark size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiling revealed that we were spending lots of time growing the buffer. Buffer operations is definitely something we want to optimize, but for this specific benchmark what we're interested in is UTF-8 scanning performance. Each iteration of the two scaning benchmark were producing 20MB of JSON, now they only produce 5MB. Now: ``` == Encoding mostly utf8 (5001001 bytes) ruby 3.4.0dev (2024-10-18T19:01:45Z master 7be9a333ca) +YJIT +PRISM [arm64-darwin23] Warming up -------------------------------------- json 35.000 i/100ms oj 36.000 i/100ms rapidjson 10.000 i/100ms Calculating ------------------------------------- json 359.161 (± 1.4%) i/s (2.78 ms/i) - 1.820k in 5.068542s oj 359.699 (± 0.6%) i/s (2.78 ms/i) - 1.800k in 5.004291s rapidjson 99.687 (± 2.0%) i/s (10.03 ms/i) - 500.000 in 5.017321s Comparison: json: 359.2 i/s oj: 359.7 i/s - same-ish: difference falls within error rapidjson: 99.7 i/s - 3.60x slower ``` --- benchmark/encoder.rb | 8 +++----- ext/json/ext/generator/generator.c | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index bd0105ae3..2b02d5d5e 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -59,11 +59,9 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } -# On this one we're a bit slower (~25%). -benchmark_encoding "mostly utf8", ([("€" * 3333)] * 2000), except: %i(json_state) - -# On these three benchmarks we perform well. Either on par or very closely faster/slower -benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 2000), except: %i(json_state) +# On these benchmarks we perform well. Either on par or very closely faster/slower +benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) +benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state) diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index f38e340e7..d51013307 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -71,7 +71,7 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca } case 3: { unsigned char b2 = ptr[pos + 1]; - if (out_script_safe && b2 == 0x80) { + if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) { unsigned char b3 = ptr[pos + 2]; if (b3 == 0xA8) { FLUSH_POS(3);