From d90d7491bce2efc4169f664bebafec5c123f6efd Mon Sep 17 00:00:00 2001
From: Jean Boussier <jean.boussier@gmail.com>
Date: Sat, 19 Oct 2024 12:55:57 +0200
Subject: [PATCH] Reduce encoding benchmark size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Profiling revealed that we were spending lots of time growing the buffer.
Buffer operations is definitely something we want to optimize, but for
this specific benchmark what we're interested in is UTF-8 scanning performance.

Each iteration of the two scaning benchmark were producing 20MB of JSON,
now they only produce 5MB.

Now:

```
== Encoding mostly utf8 (5001001 bytes)
ruby 3.4.0dev (2024-10-18T19:01:45Z master 7be9a333ca) +YJIT +PRISM [arm64-darwin23]
Warming up --------------------------------------
                json    35.000 i/100ms
                  oj    36.000 i/100ms
           rapidjson    10.000 i/100ms
Calculating -------------------------------------
                json    359.161 (± 1.4%) i/s    (2.78 ms/i) -      1.820k in   5.068542s
                  oj    359.699 (± 0.6%) i/s    (2.78 ms/i) -      1.800k in   5.004291s
           rapidjson     99.687 (± 2.0%) i/s   (10.03 ms/i) -    500.000 in   5.017321s

Comparison:
                json:      359.2 i/s
                  oj:      359.7 i/s - same-ish: difference falls within error
           rapidjson:       99.7 i/s - 3.60x  slower
```
---
 benchmark/encoder.rb               | 8 +++-----
 ext/json/ext/generator/generator.c | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb
index bd0105ae3..2b02d5d5e 100644
--- a/benchmark/encoder.rb
+++ b/benchmark/encoder.rb
@@ -59,11 +59,9 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
 benchmark_encoding "small nested array", [[1,2,3,4,5]]*10
 benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }
 
-# On this one we're a bit slower (~25%).
-benchmark_encoding "mostly utf8", ([("€" * 3333)] * 2000), except: %i(json_state)
-
-# On these three benchmarks we perform well. Either on par or very closely faster/slower
-benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 2000), except: %i(json_state)
+# On these benchmarks we perform well. Either on par or very closely faster/slower
+benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state)
+benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state)
 benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
 benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state)
 
diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c
index f38e340e7..d51013307 100644
--- a/ext/json/ext/generator/generator.c
+++ b/ext/json/ext/generator/generator.c
@@ -71,7 +71,7 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca
                 }
                 case 3: {
                     unsigned char b2 = ptr[pos + 1];
-                    if (out_script_safe && b2 == 0x80) {
+                    if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) {
                         unsigned char b3 = ptr[pos + 2];
                         if (b3 == 0xA8) {
                             FLUSH_POS(3);