From 776993ade56205969c17714c4ef4dd4788e7b8ce Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 16:23:19 -0600 Subject: [PATCH 01/24] Make benchmark runnable without oj available --- benchmark/encoder.rb | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 5f3de6f5..58f30f14 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -1,9 +1,14 @@ require "benchmark/ips" require "json" require "date" -require "oj" -Oj.default_options = Oj.default_options.merge(mode: :compat) +begin + require "oj" + + Oj.default_options = Oj.default_options.merge(mode: :compat) +rescue LoadError + # no oj, just do json +end if ENV["ONLY"] RUN = ENV["ONLY"].split(/[,: ]/).map{|x| [x.to_sym, true] }.to_h @@ -16,11 +21,11 @@ end def implementations(ruby_obj) - state = JSON::State.new(JSON.dump_default_options) - { - json: ["json", proc { JSON.generate(ruby_obj) }], - oj: ["oj", proc { Oj.dump(ruby_obj) }], - } + impls = { json: ["json", proc { JSON.generate(ruby_obj) }] } + if defined? Oj + impls["oj"] = proc { Oj.dump(ruby_obj) } + end + impls end def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: []) From be45c9a2362ddf1bd77276b17bb04e31344a35b8 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 18:31:43 -0600 Subject: [PATCH 02/24] Port convert_UTF8_to_ASCII_only_JSON to Java This is new specialized logic to reduce overhead when appending ASCII-only strings to the generated JSON. Original code by @byroot See #620 --- java/src/json/ext/Generator.java | 5 + java/src/json/ext/StringEncoder.java | 188 +++++++++++++++++++++++++-- 2 files changed, 180 insertions(+), 13 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 4ab92805..1da173b1 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -438,6 +438,9 @@ void generate(ThreadContext context, Session session, RubyString object, OutputS StringEncoder stringEncoder = session.getStringEncoder(context); ByteList byteList = object.getByteList(); + stringEncoder.init(byteList); + stringEncoder.out = buffer; + stringEncoder.append('"'); switch (object.scanForCodeRange()) { case StringSupport.CR_7BIT: stringEncoder.encodeASCII(context, byteList, buffer); @@ -448,6 +451,8 @@ void generate(ThreadContext context, Session session, RubyString object, OutputS default: throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); } + stringEncoder.quoteStop(stringEncoder.pos); + stringEncoder.append('"'); } }; diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 68fd81e3..2ab4696a 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -18,9 +18,82 @@ * and throws a GeneratorError if any problem is found. */ final class StringEncoder extends ByteListTranscoder { + private static final int CHAR_LENGTH_MASK = 7; + private static final byte[] BACKSLASH_DOUBLEQUOTE = {'\\', '"'}; + private static final byte[] BACKSLASH_BACKSLASH = {'\\', '\\'}; + private static final byte[] BACKSLASH_FORWARDSLASH = {'\\', '/'}; + private static final byte[] BACKSLASH_B = {'\\', 'b'}; + private static final byte[] BACKSLASH_F = {'\\', 'f'}; + private static final byte[] BACKSLASH_N = {'\\', 'n'}; + private static final byte[] BACKSLASH_R = {'\\', 'r'}; + private static final byte[] BACKSLASH_T = {'\\', 't'}; + + static final byte[] ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + static final byte[] ASCII_ONLY_ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, + }; + + static final byte[] SCRIPT_SAFE_ESCAPE_TABLE = { + // ASCII Control Characters + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + // ASCII Characters + 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Continuation byte + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // First byte of a 2-byte code point + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // First byte of a 3-byte code point + 3, 3, 11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029 + //First byte of a 4+ byte code point + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, + }; + private final boolean asciiOnly, scriptSafe; - private OutputStream out; + OutputStream out; // Escaped characters will reuse this array, to avoid new allocations // or appending them byte-by-byte @@ -48,25 +121,114 @@ final class StringEncoder extends ByteListTranscoder { } void encode(ThreadContext context, ByteList src, OutputStream out) throws IOException { - init(src); - this.out = out; - append('"'); while (hasNext()) { handleChar(readUtf8Char(context)); } - quoteStop(pos); - append('"'); } + // C: convert_UTF8_to_ASCII_only_JSON void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws IOException { - init(src); - this.out = out; - append('"'); - while (hasNext()) { - handleChar(readASCIIChar()); + byte[] escape_table = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE; + byte[] hexdig = HEX; + byte[] scratch = aux; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + byte ch = ptrBytes[ptr + pos]; + int ch_len = escape_table[ch]; + + if (ch_len != 0) { + switch (ch_len) { + case 9: { + if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } pos += 1; beg = pos; // FLUSH_POS + switch (ch) { + case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; + case '\\': appendEscape(BACKSLASH_BACKSLASH); break; + case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; + case '\b': appendEscape(BACKSLASH_B); break; + case '\f': appendEscape(BACKSLASH_F); break; + case '\n': appendEscape(BACKSLASH_N); break; + case '\r': appendEscape(BACKSLASH_R); break; + case '\t': appendEscape(BACKSLASH_T); break; + default: { + scratch[2] = '0'; + scratch[3] = '0'; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + append(scratch, 0, 6); + break; + } + } + break; + } + default: { + int wchar = 0; + ch_len = ch_len & CHAR_LENGTH_MASK; + + switch(ch_len) { + case 2: + wchar = ptrBytes[ptr + pos] & 0x1F; + break; + case 3: + wchar = ptrBytes[ptr + pos] & 0x0F; + break; + case 4: + wchar = ptrBytes[ptr + pos] & CHAR_LENGTH_MASK; + break; + } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (ptrBytes[ptr + pos +i] & 0x3F); + } + + if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } pos += ch_len; beg = pos; // FLUSH_POS + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + append(scratch, 0, 6); + } else { + int hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (wchar >> 10); + lo = 0xDC00 + (wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + append(scratch, 0, 12); + } + + break; + } + } + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); } - quoteStop(pos); - append('"'); + } + + private void appendEscape(byte[] escape) throws IOException { + append(escape, 0, 2); } protected void append(int b) throws IOException { From 4d37e9fee12fa3f8cb819561729afcdb2e9cc348 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 18:39:20 -0600 Subject: [PATCH 03/24] Align string generate method with generate_json_string --- java/src/json/ext/Generator.java | 49 +---------------------- java/src/json/ext/StringEncoder.java | 58 ++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 48 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 1da173b1..457e0a5d 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -428,57 +428,10 @@ int guessSize(ThreadContext context, Session session, RubyString object) { @Override void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { - try { - object = ensureValidEncoding(context, object); - } catch (RaiseException re) { - RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage()); - exc.setCause(re.getException()); - throw exc.toThrowable(); - } - - StringEncoder stringEncoder = session.getStringEncoder(context); - ByteList byteList = object.getByteList(); - stringEncoder.init(byteList); - stringEncoder.out = buffer; - stringEncoder.append('"'); - switch (object.scanForCodeRange()) { - case StringSupport.CR_7BIT: - stringEncoder.encodeASCII(context, byteList, buffer); - break; - case StringSupport.CR_VALID: - stringEncoder.encode(context, byteList, buffer); - break; - default: - throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); - } - stringEncoder.quoteStop(stringEncoder.pos); - stringEncoder.append('"'); + session.getStringEncoder(context).generate(context, object, buffer); } }; - static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { - Encoding encoding = str.getEncoding(); - RubyString utf8String; - if (!(encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE)) { - if (encoding == ASCIIEncoding.INSTANCE) { - utf8String = str.strDup(context.runtime); - utf8String.setEncoding(UTF8Encoding.INSTANCE); - switch (utf8String.getCodeRange()) { - case StringSupport.CR_7BIT: - return utf8String; - case StringSupport.CR_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8String; - } - } - - str = (RubyString) str.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); - } - return str; - } - static final Handler TRUE_HANDLER = new KeywordHandler<>("true"); static final Handler FALSE_HANDLER = diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 2ab4696a..ab45a145 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -5,9 +5,16 @@ */ package json.ext; +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; +import org.jruby.RubyException; +import org.jruby.RubyString; import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.util.ByteList; +import org.jruby.util.StringSupport; import java.io.IOException; import java.io.OutputStream; @@ -120,6 +127,57 @@ final class StringEncoder extends ByteListTranscoder { this.scriptSafe = scriptSafe; } + // C: generate_json_string + void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException { + try { + object = ensureValidEncoding(context, object); + } catch (RaiseException re) { + RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage()); + exc.setCause(re.getException()); + throw exc.toThrowable(); + } + + ByteList byteList = object.getByteList(); + init(byteList); + out = buffer; + append('"'); + switch (object.scanForCodeRange()) { + case StringSupport.CR_7BIT: + encodeASCII(context, byteList, buffer); + break; + case StringSupport.CR_VALID: + encode(context, byteList, buffer); + break; + default: + throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); + } + quoteStop(pos); + append('"'); + } + + static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { + Encoding encoding = str.getEncoding(); + RubyString utf8String; + if (!(encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE)) { + if (encoding == ASCIIEncoding.INSTANCE) { + utf8String = str.strDup(context.runtime); + utf8String.setEncoding(UTF8Encoding.INSTANCE); + switch (utf8String.getCodeRange()) { + case StringSupport.CR_7BIT: + return utf8String; + case StringSupport.CR_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8String; + } + } + + str = (RubyString) str.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + } + return str; + } + void encode(ThreadContext context, ByteList src, OutputStream out) throws IOException { while (hasNext()) { handleChar(readUtf8Char(context)); From 38c7831d9d6b7056e0b206ee62e453ce3076122b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 21:41:58 -0600 Subject: [PATCH 04/24] Port convert_UTF8_to_JSON from C Also includes updated logic for generate (generate_json_string) based on current C code. Original code by @byroot See #620 --- java/src/json/ext/StringEncoder.java | 106 ++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 11 deletions(-) diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index ab45a145..6c5d8279 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; /** * An encoder that reads from the given source and outputs its representation @@ -46,6 +47,15 @@ final class StringEncoder extends ByteListTranscoder { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\' 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static final byte[] ASCII_ONLY_ESCAPE_TABLE = { @@ -97,6 +107,8 @@ final class StringEncoder extends ByteListTranscoder { //First byte of a 4+ byte code point 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, }; + private static final byte[] BACKSLASH_U2028 = "\\u2028".getBytes(StandardCharsets.US_ASCII); + private static final byte[] BACKSLASH_U2029 = "\\u2029".getBytes(StandardCharsets.US_ASCII); private final boolean asciiOnly, scriptSafe; @@ -143,10 +155,12 @@ void generate(ThreadContext context, RubyString object, OutputStream buffer) thr append('"'); switch (object.scanForCodeRange()) { case StringSupport.CR_7BIT: - encodeASCII(context, byteList, buffer); - break; case StringSupport.CR_VALID: - encode(context, byteList, buffer); + if (asciiOnly) { + encodeASCII(byteList, scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE); + } else { + encode(byteList, scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE); + } break; default: throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); @@ -178,15 +192,85 @@ static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { return str; } - void encode(ThreadContext context, ByteList src, OutputStream out) throws IOException { - while (hasNext()) { - handleChar(readUtf8Char(context)); + // C: convert_UTF8_to_JSON + void encode(ByteList src, byte[] escape_table) throws IOException { + byte[] hexdig = HEX; + byte[] scratch = aux; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = escape_table[ch]; + /* JSON encoding */ + + if (ch_len > 0) { + switch (ch_len) { + case 9: { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + switch (ch) { + case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; + case '\\': appendEscape(BACKSLASH_BACKSLASH); break; + case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; + case '\b': appendEscape(BACKSLASH_B); break; + case '\f': appendEscape(BACKSLASH_F); break; + case '\n': appendEscape(BACKSLASH_N); break; + case '\r': appendEscape(BACKSLASH_R); break; + case '\t': appendEscape(BACKSLASH_T); break; + default: { + scratch[2] = '0'; + scratch[3] = '0'; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + append(scratch, 0, 6); + break; + } + } + break; + } + case 11: { + int b2 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 1]); + if (b2 == 0x80) { + int b3 = Byte.toUnsignedInt(ptrBytes[ptr + pos + 2]); + if (b3 == 0xA8) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + append(BACKSLASH_U2028, 0, 6); + break; + } else if (b3 == 0xA9) { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 3); + append(BACKSLASH_U2029, 0, 6); + break; + } + } + ch_len = 3; + // fallthrough + } + default: + pos += ch_len; + break; + } + } else { + pos++; + } } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } + + private int flushPos(int pos, int beg, byte[] ptrBytes, int ptr, int size) throws IOException { + if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } + return pos + size; } // C: convert_UTF8_to_ASCII_only_JSON - void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws IOException { - byte[] escape_table = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE; + void encodeASCII(ByteList src, byte[] escape_table) throws IOException { byte[] hexdig = HEX; byte[] scratch = aux; @@ -198,13 +282,13 @@ void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws I int pos = 0; while (pos < len) { - byte ch = ptrBytes[ptr + pos]; + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); int ch_len = escape_table[ch]; if (ch_len != 0) { switch (ch_len) { case 9: { - if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } pos += 1; beg = pos; // FLUSH_POS + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); switch (ch) { case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; case '\\': appendEscape(BACKSLASH_BACKSLASH); break; @@ -245,7 +329,7 @@ void encodeASCII(ThreadContext context, ByteList src, OutputStream out) throws I wchar = (wchar << 6) | (ptrBytes[ptr + pos +i] & 0x3F); } - if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } pos += ch_len; beg = pos; // FLUSH_POS + beg = pos = flushPos(pos, beg, ptrBytes, ptr, ch_len); if (wchar <= 0xFFFF) { scratch[2] = hexdig[wchar >> 12]; From 0a5f6e7f551afd0f60852c6f1be51750bf7ff077 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 22:57:20 -0600 Subject: [PATCH 05/24] Use external iteration to reduce alloc Lots of surrounding state so just take the hit of a Set and Iterator rather than a big visitor object. --- java/src/json/ext/Generator.java | 102 ++++++++++++++++--------------- 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 457e0a5d..dd1e9f54 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.io.OutputStream; import java.math.BigInteger; +import java.util.Set; import static java.nio.charset.StandardCharsets.*; @@ -359,56 +360,13 @@ void generate(ThreadContext context, final Session session, RubyHash object, fin buffer.write((byte)'{'); buffer.write(objectNLBytes); - final boolean[] firstPair = new boolean[]{true}; - object.visitAll(context, new RubyHash.VisitorWithState() { - @Override - public void visit(ThreadContext context, RubyHash self, IRubyObject key, IRubyObject value, int index, boolean[] firstPair) { - try { - if (firstPair[0]) { - firstPair[0] = false; - } else { - buffer.write((byte) ','); - buffer.write(objectNLBytes); - } - if (!objectNl.isEmpty()) buffer.write(indent); - - Ruby runtime = context.runtime; - - IRubyObject keyStr; - RubyClass keyClass = key.getType(); - if (key instanceof RubyString) { - if (keyClass == runtime.getString()) { - keyStr = key; - } else { - keyStr = key.callMethod(context, "to_s"); - } - } else if (keyClass == runtime.getSymbol()) { - keyStr = key.asString(); - } else { - keyStr = TypeConverter.convertToType(key, runtime.getString(), "to_s"); - } - - if (keyStr.getMetaClass() == runtime.getString()) { - STRING_HANDLER.generate(context, session, (RubyString) keyStr, buffer); - } else { - Utils.ensureString(keyStr); - Handler keyHandler = getHandlerFor(runtime, keyStr); - keyHandler.generate(context, session, keyStr, buffer); - } - - buffer.write(spaceBefore.unsafeBytes()); - buffer.write((byte) ':'); - buffer.write(space.unsafeBytes()); - - Handler valueHandler = getHandlerFor(runtime, value); - valueHandler.generate(context, session, value, buffer); - } catch (Throwable t) { - Helpers.throwException(t); - } - } - }, firstPair); + boolean firstPair = true; + for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { + processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); + firstPair = false; + } state.decreaseDepth(); - if (!firstPair[0] && !objectNl.isEmpty()) { + if (!firstPair && !objectNl.isEmpty()) { buffer.write(objectNLBytes); } buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); @@ -416,6 +374,52 @@ public void visit(ThreadContext context, RubyHash self, IRubyObject key, IRubyOb } }; + private static void processEntry(ThreadContext context, Session session, OutputStream buffer, RubyHash.RubyHashEntry entry, boolean firstPair, ByteList objectNl, byte[] indent, ByteList spaceBefore, ByteList space) { + IRubyObject key = (IRubyObject) entry.getKey(); + IRubyObject value = (IRubyObject) entry.getValue(); + + try { + if (!firstPair) { + buffer.write((byte) ','); + buffer.write(objectNl.unsafeBytes()); + } + if (!objectNl.isEmpty()) buffer.write(indent); + + Ruby runtime = context.runtime; + + IRubyObject keyStr; + RubyClass keyClass = key.getType(); + if (key instanceof RubyString) { + if (keyClass == runtime.getString()) { + keyStr = key; + } else { + keyStr = key.callMethod(context, "to_s"); + } + } else if (keyClass == runtime.getSymbol()) { + keyStr = key.asString(); + } else { + keyStr = TypeConverter.convertToType(key, runtime.getString(), "to_s"); + } + + if (keyStr.getMetaClass() == runtime.getString()) { + STRING_HANDLER.generate(context, session, (RubyString) keyStr, buffer); + } else { + Utils.ensureString(keyStr); + Handler keyHandler = getHandlerFor(runtime, keyStr); + keyHandler.generate(context, session, keyStr, buffer); + } + + buffer.write(spaceBefore.unsafeBytes()); + buffer.write((byte) ':'); + buffer.write(space.unsafeBytes()); + + Handler valueHandler = getHandlerFor(runtime, value); + valueHandler.generate(context, session, value, buffer); + } catch (Throwable t) { + Helpers.throwException(t); + } + } + static final Handler STRING_HANDLER = new Handler() { @Override From 98cb7859c71a78a9ee2557f29fccb69d83e1acac Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 23:42:22 -0600 Subject: [PATCH 06/24] Remove unused imports --- java/src/json/ext/Generator.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index dd1e9f54..3fca0fe8 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -5,9 +5,6 @@ */ package json.ext; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; @@ -19,16 +16,12 @@ import org.jruby.RubyFloat; import org.jruby.RubyHash; import org.jruby.RubyString; -import org.jruby.RubySymbol; -import org.jruby.RubyException; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; -import org.jruby.exceptions.RaiseException; import org.jruby.util.ConvertBytes; import org.jruby.util.IOOutputStream; -import org.jruby.util.StringSupport; import org.jruby.util.TypeConverter; import java.io.BufferedOutputStream; From 75cf6fed2405b9f12157899aa50c6eeabea1dbe3 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 23:45:15 -0600 Subject: [PATCH 07/24] Inline ConvertBytes logic for long to byte[] This change duplicates some code from JRuby to allow rendering the fixnum value to a shared byte array rather than allocating new for each value. Since fixnum dumping is a leaf operation, only one is needed per session. --- java/src/json/ext/Generator.java | 55 ++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 3fca0fe8..bd983813 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -20,7 +20,6 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; -import org.jruby.util.ConvertBytes; import org.jruby.util.IOOutputStream; import org.jruby.util.TypeConverter; @@ -132,6 +131,7 @@ static class Session { private IRubyObject possibleState; private RuntimeInfo info; private StringEncoder stringEncoder; + private byte[] charBytes; Session(GeneratorState state) { this.state = state; @@ -154,6 +154,11 @@ public RuntimeInfo getInfo(ThreadContext context) { return info; } + public byte[] getCharBytes() { + if (charBytes == null) charBytes = new byte[Long.toString(Long.MIN_VALUE).length()]; + return charBytes; + } + public StringEncoder getStringEncoder(ThreadContext context) { if (stringEncoder == null) { GeneratorState state = getState(context); @@ -237,10 +242,56 @@ void generate(ThreadContext context, Session session, RubyBignum object, OutputS new Handler() { @Override void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { - buffer.write(ConvertBytes.longToCharBytes(object.getLongValue())); + long i = object.getLongValue(); + if (i == 0) { + buffer.write(ZERO_BYTES); + } else if (i == Long.MIN_VALUE) { + buffer.write(MIN_VALUE_BYTES_RADIX_10); + } else { + boolean neg = i < 0; + if (neg) i = -i; + int newSize = sizeWithDecimalString(i, neg, 0); + byte[] charBytes = session.getCharBytes(); + writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); + buffer.write(charBytes, 0, newSize); + } } }; + private static final byte[] ZERO_BYTES = new byte[] {(byte)'0'}; + private static final byte[] MIN_VALUE_BYTES_RADIX_10; + + static { + MIN_VALUE_BYTES_RADIX_10 = ByteList.plain(Long.toString(Long.MIN_VALUE, 10)); + } + + private static int sizeWithDecimalString(long i, boolean neg, int baseSize) { + int count = 0; + while (i > 9) { + i /= 10; + count++; + } + int newSize = baseSize + count + 1; + + if (neg) newSize++; + + return newSize; + } + + private static void writeDecimalDigitsToArray(byte[] bytes, long i, boolean negative, int begin, int originalSize, int newSize) { + // write digits directly into the prepared byte array + for (int n = newSize - 1; i > 0; n--) { + bytes[begin + n] = decimalByteForDigit(i); + i /= 10; + } + + if (negative) bytes[originalSize] = '-'; + } + + private static byte decimalByteForDigit(long i) { + return (byte) (i % 10 + '0'); + } + static final Handler FLOAT_HANDLER = new Handler() { @Override From 8f4ce51c2264965f82c723807024fd2939d7efaa Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 8 Jan 2025 23:49:48 -0600 Subject: [PATCH 08/24] Eliminate * import --- java/src/json/ext/Generator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index bd983813..8bea0ba7 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -29,7 +29,7 @@ import java.math.BigInteger; import java.util.Set; -import static java.nio.charset.StandardCharsets.*; +import static java.nio.charset.StandardCharsets.UTF_8; public final class Generator { From 845fc46adaeaa307a305660fa2bc0e68f7771365 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 9 Jan 2025 00:24:32 -0600 Subject: [PATCH 09/24] Restructure handlers for easier profiling Anonymous classes show up as unnamed, numbered classes in profiles which makes them difficult to read. --- java/src/json/ext/Generator.java | 379 +++++++++++++++---------------- 1 file changed, 189 insertions(+), 190 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 8bea0ba7..15e3db21 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -229,34 +229,54 @@ void generate(ThreadContext context, Session session, T object, OutputStream buf /* Handlers */ - static final Handler BIGNUM_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyBignum object, OutputStream buffer) throws IOException { - BigInteger bigInt = object.getValue(); - buffer.write(bigInt.toString().getBytes(UTF_8)); - } - }; - - static final Handler FIXNUM_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { - long i = object.getLongValue(); - if (i == 0) { - buffer.write(ZERO_BYTES); - } else if (i == Long.MIN_VALUE) { - buffer.write(MIN_VALUE_BYTES_RADIX_10); - } else { - boolean neg = i < 0; - if (neg) i = -i; - int newSize = sizeWithDecimalString(i, neg, 0); - byte[] charBytes = session.getCharBytes(); - writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); - buffer.write(charBytes, 0, newSize); - } + static final Handler BIGNUM_HANDLER = new BignumHandler(); + static final Handler FIXNUM_HANDLER = new FixnumHandler(); + static final Handler FLOAT_HANDLER = new FloatHandler(); + static final Handler> ARRAY_HANDLER = new ArrayHandler(); + static final Handler HASH_HANDLER = new HashHandler(); + static final Handler STRING_HANDLER = new StringHandler(); + static final Handler TRUE_HANDLER = new KeywordHandler<>("true"); + static final Handler FALSE_HANDLER = new KeywordHandler<>("false"); + static final Handler NIL_HANDLER = new KeywordHandler<>("null"); + + /** + * The default handler (Object#to_json): coerces the object + * to string using #to_s, and serializes that string. + */ + static final Handler OBJECT_HANDLER = new ObjectHandler(); + + /** + * A handler that simply calls #to_json(state) on the + * given object. + */ + static final Handler GENERIC_HANDLER = new GenericHandler(); + + private static class BignumHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyBignum object, OutputStream buffer) throws IOException { + BigInteger bigInt = object.getValue(); + buffer.write(bigInt.toString().getBytes(UTF_8)); + } + } + + private static class FixnumHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { + long i = object.getLongValue(); + if (i == 0) { + buffer.write(ZERO_BYTES); + } else if (i == Long.MIN_VALUE) { + buffer.write(MIN_VALUE_BYTES_RADIX_10); + } else { + boolean neg = i < 0; + if (neg) i = -i; + int newSize = sizeWithDecimalString(i, neg, 0); + byte[] charBytes = session.getCharBytes(); + writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); + buffer.write(charBytes, 0, newSize); } - }; + } + } private static final byte[] ZERO_BYTES = new byte[] {(byte)'0'}; private static final byte[] MIN_VALUE_BYTES_RADIX_10; @@ -292,131 +312,128 @@ private static byte decimalByteForDigit(long i) { return (byte) (i % 10 + '0'); } - static final Handler FLOAT_HANDLER = - new Handler() { - @Override - void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { - double value = object.getValue(); + private static class FloatHandler extends Handler { + @Override + void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { + double value = object.getValue(); - if (Double.isInfinite(value) || Double.isNaN(value)) { - if (!session.getState(context).allowNaN()) { - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } + if (Double.isInfinite(value) || Double.isNaN(value)) { + if (!session.getState(context).allowNaN()) { + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); } - - buffer.write(Double.toString(value).getBytes(UTF_8)); } - }; + + buffer.write(Double.toString(value).getBytes(UTF_8)); + } + } private static final byte[] EMPTY_ARRAY_BYTES = "[]".getBytes(); - static final Handler> ARRAY_HANDLER = - new Handler>() { - @Override - int guessSize(ThreadContext context, Session session, RubyArray object) { - GeneratorState state = session.getState(context); - int depth = state.getDepth(); - int perItem = + private static class ArrayHandler extends Handler> { + @Override + int guessSize(ThreadContext context, Session session, RubyArray object) { + GeneratorState state = session.getState(context); + int depth = state.getDepth(); + int perItem = 4 // prealloc - + (depth + 1) * state.getIndent().length() // indent - + 1 + state.getArrayNl().length(); // ',' arrayNl - return 2 + object.size() * perItem; - } + + (depth + 1) * state.getIndent().length() // indent + + 1 + state.getArrayNl().length(); // ',' arrayNl + return 2 + object.size() * perItem; + } - @Override - void generate(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { - GeneratorState state = session.getState(context); - int depth = state.increaseDepth(context); + @Override + void generate(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { + GeneratorState state = session.getState(context); + int depth = state.increaseDepth(context); - if (object.isEmpty()) { - buffer.write(EMPTY_ARRAY_BYTES); - state.decreaseDepth(); - return; - } + if (object.isEmpty()) { + buffer.write(EMPTY_ARRAY_BYTES); + state.decreaseDepth(); + return; + } - Ruby runtime = context.runtime; + Ruby runtime = context.runtime; - ByteList indentUnit = state.getIndent(); - byte[] shift = Utils.repeat(indentUnit, depth); + ByteList indentUnit = state.getIndent(); + byte[] shift = Utils.repeat(indentUnit, depth); - ByteList arrayNl = state.getArrayNl(); - byte[] delim = new byte[1 + arrayNl.length()]; - delim[0] = ','; - System.arraycopy(arrayNl.unsafeBytes(), arrayNl.begin(), delim, 1, - arrayNl.length()); + ByteList arrayNl = state.getArrayNl(); + byte[] delim = new byte[1 + arrayNl.length()]; + delim[0] = ','; + System.arraycopy(arrayNl.unsafeBytes(), arrayNl.begin(), delim, 1, + arrayNl.length()); - buffer.write((byte)'['); - buffer.write(arrayNl.bytes()); - boolean firstItem = true; - - for (int i = 0, t = object.getLength(); i < t; i++) { - IRubyObject element = object.eltInternal(i); - if (firstItem) { - firstItem = false; - } else { - buffer.write(delim); - } - buffer.write(shift); - Handler handler = getHandlerFor(runtime, element); - handler.generate(context, session, element, buffer); - } + buffer.write((byte)'['); + buffer.write(arrayNl.bytes()); + boolean firstItem = true; - state.decreaseDepth(); - if (!arrayNl.isEmpty()) { - buffer.write(arrayNl.bytes()); - buffer.write(shift, 0, state.getDepth() * indentUnit.length()); + for (int i = 0, t = object.getLength(); i < t; i++) { + IRubyObject element = object.eltInternal(i); + if (firstItem) { + firstItem = false; + } else { + buffer.write(delim); } + buffer.write(shift); + Handler handler = getHandlerFor(runtime, element); + handler.generate(context, session, element, buffer); + } - buffer.write((byte)']'); + state.decreaseDepth(); + if (!arrayNl.isEmpty()) { + buffer.write(arrayNl.bytes()); + buffer.write(shift, 0, state.getDepth() * indentUnit.length()); } - }; + + buffer.write((byte)']'); + } + } private static final byte[] EMPTY_HASH_BYTES = "{}".getBytes(); - static final Handler HASH_HANDLER = - new Handler() { - @Override - int guessSize(ThreadContext context, Session session, RubyHash object) { - GeneratorState state = session.getState(context); - int perItem = + private static class HashHandler extends Handler { + @Override + int guessSize(ThreadContext context, Session session, RubyHash object) { + GeneratorState state = session.getState(context); + int perItem = 12 // key, colon, comma - + (state.getDepth() + 1) * state.getIndent().length() - + state.getSpaceBefore().length() - + state.getSpace().length(); - return 2 + object.size() * perItem; - } + + (state.getDepth() + 1) * state.getIndent().length() + + state.getSpaceBefore().length() + + state.getSpace().length(); + return 2 + object.size() * perItem; + } - @Override - void generate(ThreadContext context, final Session session, RubyHash object, final OutputStream buffer) throws IOException { - final GeneratorState state = session.getState(context); - final int depth = state.increaseDepth(context); + @Override + void generate(ThreadContext context, final Session session, RubyHash object, final OutputStream buffer) throws IOException { + final GeneratorState state = session.getState(context); + final int depth = state.increaseDepth(context); - if (object.isEmpty()) { - buffer.write(EMPTY_HASH_BYTES); - state.decreaseDepth(); - return; - } + if (object.isEmpty()) { + buffer.write(EMPTY_HASH_BYTES); + state.decreaseDepth(); + return; + } - final ByteList objectNl = state.getObjectNl(); - byte[] objectNLBytes = objectNl.unsafeBytes(); - final byte[] indent = Utils.repeat(state.getIndent(), depth); - final ByteList spaceBefore = state.getSpaceBefore(); - final ByteList space = state.getSpace(); + final ByteList objectNl = state.getObjectNl(); + byte[] objectNLBytes = objectNl.unsafeBytes(); + final byte[] indent = Utils.repeat(state.getIndent(), depth); + final ByteList spaceBefore = state.getSpaceBefore(); + final ByteList space = state.getSpace(); - buffer.write((byte)'{'); - buffer.write(objectNLBytes); + buffer.write((byte)'{'); + buffer.write(objectNLBytes); - boolean firstPair = true; - for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { - processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); - firstPair = false; - } - state.decreaseDepth(); - if (!firstPair && !objectNl.isEmpty()) { - buffer.write(objectNLBytes); - } - buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); - buffer.write((byte)'}'); + boolean firstPair = true; + for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { + processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); + firstPair = false; } - }; + state.decreaseDepth(); + if (!firstPair && !objectNl.isEmpty()) { + buffer.write(objectNLBytes); + } + buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); + buffer.write((byte)'}'); + } + } private static void processEntry(ThreadContext context, Session session, OutputStream buffer, RubyHash.RubyHashEntry entry, boolean firstPair, ByteList objectNl, byte[] indent, ByteList spaceBefore, ByteList space) { IRubyObject key = (IRubyObject) entry.getKey(); @@ -464,73 +481,55 @@ private static void processEntry(ThreadContext context, Session session, OutputS } } - static final Handler STRING_HANDLER = - new Handler() { - @Override - int guessSize(ThreadContext context, Session session, RubyString object) { - // for most applications, most strings will be just a set of - // printable ASCII characters without any escaping, so let's - // just allocate enough space for that + the quotes - return 2 + object.getByteList().length(); - } - - @Override - void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { - session.getStringEncoder(context).generate(context, object, buffer); - } - }; + private static class StringHandler extends Handler { + @Override + int guessSize(ThreadContext context, Session session, RubyString object) { + // for most applications, most strings will be just a set of + // printable ASCII characters without any escaping, so let's + // just allocate enough space for that + the quotes + return 2 + object.getByteList().length(); + } - static final Handler TRUE_HANDLER = - new KeywordHandler<>("true"); - static final Handler FALSE_HANDLER = - new KeywordHandler<>("false"); - static final Handler NIL_HANDLER = - new KeywordHandler<>("null"); + @Override + void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { + session.getStringEncoder(context).generate(context, object, buffer); + } + } - /** - * The default handler (Object#to_json): coerces the object - * to string using #to_s, and serializes that string. - */ - static final Handler OBJECT_HANDLER = - new Handler() { - @Override - RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - RubyString str = object.asString(); - return STRING_HANDLER.generateNew(context, session, str); - } + private static class ObjectHandler extends Handler { + @Override + RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { + RubyString str = object.asString(); + return STRING_HANDLER.generateNew(context, session, str); + } - @Override - void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString str = object.asString(); - STRING_HANDLER.generate(context, session, str, buffer); - } - }; + @Override + void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + RubyString str = object.asString(); + STRING_HANDLER.generate(context, session, str, buffer); + } + } - /** - * A handler that simply calls #to_json(state) on the - * given object. - */ - static final Handler GENERIC_HANDLER = - new Handler() { - @Override - RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - GeneratorState state = session.getState(context); - if (state.strict()) { - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } else if (object.respondsTo("to_json")) { - IRubyObject result = object.callMethod(context, "to_json", state); - if (result instanceof RubyString) return (RubyString)result; - throw context.runtime.newTypeError("to_json must return a String"); - } else { - return OBJECT_HANDLER.generateNew(context, session, object); - } + private static class GenericHandler extends Handler { + @Override + RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { + GeneratorState state = session.getState(context); + if (state.strict()) { + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); + } else if (object.respondsTo("to_json")) { + IRubyObject result = object.callMethod(context, "to_json", state); + if (result instanceof RubyString) return (RubyString)result; + throw context.runtime.newTypeError("to_json must return a String"); + } else { + return OBJECT_HANDLER.generateNew(context, session, object); } + } - @Override - void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString result = generateNew(context, session, object); - ByteList bytes = result.getByteList(); - buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); - } - }; + @Override + void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + RubyString result = generateNew(context, session, object); + ByteList bytes = result.getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + } + } } From 9d74a1f79d5d0f176a80f398c9479b0eeb26c7f2 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 9 Jan 2025 00:44:42 -0600 Subject: [PATCH 10/24] Avoid allocation when writing Array delimiters Rather than allocating a buffer to hold N copies of arrayNL, just write it N times. We're buffering into a stream anyway. This makes array dumping zero-alloc other than buffer growth. --- java/src/json/ext/Generator.java | 21 +++++++++------------ java/src/json/ext/Utils.java | 9 +++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 15e3db21..84c87818 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -354,16 +354,10 @@ void generate(ThreadContext context, Session session, RubyArray obj Ruby runtime = context.runtime; ByteList indentUnit = state.getIndent(); - byte[] shift = Utils.repeat(indentUnit, depth); - ByteList arrayNl = state.getArrayNl(); - byte[] delim = new byte[1 + arrayNl.length()]; - delim[0] = ','; - System.arraycopy(arrayNl.unsafeBytes(), arrayNl.begin(), delim, 1, - arrayNl.length()); - buffer.write((byte)'['); - buffer.write(arrayNl.bytes()); + buffer.write('['); + buffer.write(arrayNl.unsafeBytes(), arrayNl.begin(), arrayNl.realSize()); boolean firstItem = true; for (int i = 0, t = object.getLength(); i < t; i++) { @@ -371,9 +365,12 @@ void generate(ThreadContext context, Session session, RubyArray obj if (firstItem) { firstItem = false; } else { - buffer.write(delim); + buffer.write(','); + if (arrayNl.length() > 0) { + buffer.write(arrayNl.unsafeBytes(), arrayNl.begin(), arrayNl.length()); + } } - buffer.write(shift); + Utils.repeatWrite(buffer, indentUnit, depth); Handler handler = getHandlerFor(runtime, element); handler.generate(context, session, element, buffer); } @@ -381,10 +378,10 @@ void generate(ThreadContext context, Session session, RubyArray obj state.decreaseDepth(); if (!arrayNl.isEmpty()) { buffer.write(arrayNl.bytes()); - buffer.write(shift, 0, state.getDepth() * indentUnit.length()); + Utils.repeatWrite(buffer, indentUnit, state.getDepth()); } - buffer.write((byte)']'); + buffer.write((byte) ']'); } } diff --git a/java/src/json/ext/Utils.java b/java/src/json/ext/Utils.java index 87139cdb..3d2d4b4f 100644 --- a/java/src/json/ext/Utils.java +++ b/java/src/json/ext/Utils.java @@ -16,6 +16,9 @@ import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; +import java.io.IOException; +import java.io.OutputStream; + /** * Library of miscellaneous utility functions */ @@ -88,4 +91,10 @@ static byte[] repeat(byte[] a, int begin, int length, int n) { } return result; } + + static void repeatWrite(OutputStream out, ByteList a, int n) throws IOException { + for (int i = 0; i < n; i++) { + out.write(a.unsafeBytes(), a.begin(), a.length()); + } + } } From 92b24de11e5c3454dc9c06319ca6a00b795d0d22 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 9 Jan 2025 02:40:11 -0600 Subject: [PATCH 11/24] Move away from Handler abstraction Since there's a fixed number of types we have special dumping logic for, this abstraction just introduces overhead we don't need. This patch starts moving away from indirecting all dumps through the Handler abstraction and directly generating from the type switch. This also aligns better with the main loop of the C code and should inline and optimize better. --- java/src/json/ext/Generator.java | 284 +++++++++++++++++++------------ 1 file changed, 174 insertions(+), 110 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 84c87818..ba293eaf 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -112,6 +112,26 @@ private static Handler getHandlerFor(Ruby run return GENERIC_HANDLER; } + private static void generateFor(ThreadContext context, Session session, T object, OutputStream buffer) throws IOException { + switch (((RubyBasicObject) object).getNativeClassIndex()) { + case NIL : buffer.write(NULL_STRING); return; + case TRUE : buffer.write(TRUE_STRING); return; + case FALSE : buffer.write(FALSE_STRING); return; + case FLOAT : generateFloat(context, session, (RubyFloat) object, buffer); return; + case FIXNUM : generateFixnum(session, (RubyFixnum) object, buffer); return; + case BIGNUM : generateBignum((RubyBignum) object, buffer); return; + case STRING : + if (Helpers.metaclass(object) != context.runtime.getString()) break; + generateString(context, session, (RubyString) object, buffer); return; + case ARRAY : + if (Helpers.metaclass(object) != context.runtime.getArray()) break; + generateArray(context, session, (RubyArray) object, buffer); return; + case HASH : + if (Helpers.metaclass(object) != context.runtime.getHash()) break; + generateHash(context, session, (RubyHash) object, buffer); return; + } + generateGeneric(context, session, object, buffer); + } /* Generator context */ @@ -127,6 +147,7 @@ private static Handler getHandlerFor(Ruby run * won't be part of the session. */ static class Session { + private static final int MAX_LONG_CHARS = Long.toString(Long.MIN_VALUE).length(); private GeneratorState state; private IRubyObject possibleState; private RuntimeInfo info; @@ -155,7 +176,8 @@ public RuntimeInfo getInfo(ThreadContext context) { } public byte[] getCharBytes() { - if (charBytes == null) charBytes = new byte[Long.toString(Long.MIN_VALUE).length()]; + byte[] charBytes = this.charBytes; + if (charBytes == null) charBytes = this.charBytes = new byte[MAX_LONG_CHARS]; return charBytes; } @@ -206,8 +228,8 @@ private static class KeywordHandler extends Handler { private final byte[] keyword; - private KeywordHandler(String keyword) { - this.keyword = keyword.getBytes(UTF_8); + private KeywordHandler(byte[] keyword) { + this.keyword = keyword; } @Override @@ -235,9 +257,12 @@ void generate(ThreadContext context, Session session, T object, OutputStream buf static final Handler> ARRAY_HANDLER = new ArrayHandler(); static final Handler HASH_HANDLER = new HashHandler(); static final Handler STRING_HANDLER = new StringHandler(); - static final Handler TRUE_HANDLER = new KeywordHandler<>("true"); - static final Handler FALSE_HANDLER = new KeywordHandler<>("false"); - static final Handler NIL_HANDLER = new KeywordHandler<>("null"); + private static final byte[] TRUE_STRING = "true".getBytes(); + static final Handler TRUE_HANDLER = new KeywordHandler<>(TRUE_STRING); + private static final byte[] FALSE_STRING = "false".getBytes(); + static final Handler FALSE_HANDLER = new KeywordHandler<>(FALSE_STRING); + private static final byte[] NULL_STRING = "null".getBytes(); + static final Handler NIL_HANDLER = new KeywordHandler<>(NULL_STRING); /** * The default handler (Object#to_json): coerces the object @@ -254,27 +279,35 @@ void generate(ThreadContext context, Session session, T object, OutputStream buf private static class BignumHandler extends Handler { @Override void generate(ThreadContext context, Session session, RubyBignum object, OutputStream buffer) throws IOException { - BigInteger bigInt = object.getValue(); - buffer.write(bigInt.toString().getBytes(UTF_8)); + generateBignum(object, buffer); } } + private static void generateBignum(RubyBignum object, OutputStream buffer) throws IOException { + BigInteger bigInt = object.getValue(); + buffer.write(bigInt.toString().getBytes(UTF_8)); + } + private static class FixnumHandler extends Handler { @Override void generate(ThreadContext context, Session session, RubyFixnum object, OutputStream buffer) throws IOException { - long i = object.getLongValue(); - if (i == 0) { - buffer.write(ZERO_BYTES); - } else if (i == Long.MIN_VALUE) { - buffer.write(MIN_VALUE_BYTES_RADIX_10); - } else { - boolean neg = i < 0; - if (neg) i = -i; - int newSize = sizeWithDecimalString(i, neg, 0); - byte[] charBytes = session.getCharBytes(); - writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); - buffer.write(charBytes, 0, newSize); - } + generateFixnum(session, object, buffer); + } + } + + private static void generateFixnum(Session session, RubyFixnum object, OutputStream buffer) throws IOException { + long i = object.getLongValue(); + if (i == 0) { + buffer.write(ZERO_BYTES); + } else if (i == Long.MIN_VALUE) { + buffer.write(MIN_VALUE_BYTES_RADIX_10); + } else { + boolean neg = i < 0; + if (neg) i = -i; + int newSize = sizeWithDecimalString(i, neg, 0); + byte[] charBytes = session.getCharBytes(); + writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); + buffer.write(charBytes, 0, newSize); } } @@ -315,16 +348,20 @@ private static byte decimalByteForDigit(long i) { private static class FloatHandler extends Handler { @Override void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { - double value = object.getValue(); + generateFloat(context, session, object, buffer); + } + } - if (Double.isInfinite(value) || Double.isNaN(value)) { - if (!session.getState(context).allowNaN()) { - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } - } + private static void generateFloat(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { + double value = object.getValue(); - buffer.write(Double.toString(value).getBytes(UTF_8)); + if (Double.isInfinite(value) || Double.isNaN(value)) { + if (!session.getState(context).allowNaN()) { + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); + } } + + buffer.write(Double.toString(value).getBytes(UTF_8)); } private static final byte[] EMPTY_ARRAY_BYTES = "[]".getBytes(); @@ -342,47 +379,53 @@ int guessSize(ThreadContext context, Session session, RubyArray obj @Override void generate(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { - GeneratorState state = session.getState(context); - int depth = state.increaseDepth(context); - - if (object.isEmpty()) { - buffer.write(EMPTY_ARRAY_BYTES); - state.decreaseDepth(); - return; - } - - Ruby runtime = context.runtime; + generateArray(context, session, object, buffer); + } + } - ByteList indentUnit = state.getIndent(); - ByteList arrayNl = state.getArrayNl(); + private static void generateArray(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { + GeneratorState state = session.getState(context); + int depth = state.increaseDepth(context); - buffer.write('['); - buffer.write(arrayNl.unsafeBytes(), arrayNl.begin(), arrayNl.realSize()); - boolean firstItem = true; + if (object.isEmpty()) { + buffer.write(EMPTY_ARRAY_BYTES); + state.decreaseDepth(); + return; + } - for (int i = 0, t = object.getLength(); i < t; i++) { - IRubyObject element = object.eltInternal(i); - if (firstItem) { - firstItem = false; - } else { - buffer.write(','); - if (arrayNl.length() > 0) { - buffer.write(arrayNl.unsafeBytes(), arrayNl.begin(), arrayNl.length()); - } + ByteList indentUnit = state.getIndent(); + ByteList arrayNl = state.getArrayNl(); + byte[] arrayNLBytes = arrayNl.unsafeBytes(); + int arrayNLBegin = arrayNl.begin(); + int arrayNLSize = arrayNl.realSize(); + boolean arrayNLEmpty = arrayNLSize == 0; + + buffer.write('['); + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); + boolean firstItem = true; + + int length = object.getLength(); + for (int i = 0, t = length; i < t; i++) { + IRubyObject element = object.eltInternal(i); + if (firstItem) { + firstItem = false; + } else { + buffer.write(','); + if (!arrayNLEmpty) { + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); } - Utils.repeatWrite(buffer, indentUnit, depth); - Handler handler = getHandlerFor(runtime, element); - handler.generate(context, session, element, buffer); - } - - state.decreaseDepth(); - if (!arrayNl.isEmpty()) { - buffer.write(arrayNl.bytes()); - Utils.repeatWrite(buffer, indentUnit, state.getDepth()); } + Utils.repeatWrite(buffer, indentUnit, depth); + generateFor(context, session, element, buffer); + } - buffer.write((byte) ']'); + state.decreaseDepth(); + if (!arrayNLEmpty) { + buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); + Utils.repeatWrite(buffer, indentUnit, state.getDepth()); } + + buffer.write((byte) ']'); } private static final byte[] EMPTY_HASH_BYTES = "{}".getBytes(); @@ -400,36 +443,40 @@ int guessSize(ThreadContext context, Session session, RubyHash object) { @Override void generate(ThreadContext context, final Session session, RubyHash object, final OutputStream buffer) throws IOException { - final GeneratorState state = session.getState(context); - final int depth = state.increaseDepth(context); + generateHash(context, session, object, buffer); + } + } - if (object.isEmpty()) { - buffer.write(EMPTY_HASH_BYTES); - state.decreaseDepth(); - return; - } + private static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException { + final GeneratorState state = session.getState(context); + final int depth = state.increaseDepth(context); + + if (object.isEmpty()) { + buffer.write(EMPTY_HASH_BYTES); + state.decreaseDepth(); + return; + } - final ByteList objectNl = state.getObjectNl(); - byte[] objectNLBytes = objectNl.unsafeBytes(); - final byte[] indent = Utils.repeat(state.getIndent(), depth); - final ByteList spaceBefore = state.getSpaceBefore(); - final ByteList space = state.getSpace(); + final ByteList objectNl = state.getObjectNl(); + byte[] objectNLBytes = objectNl.unsafeBytes(); + final byte[] indent = Utils.repeat(state.getIndent(), depth); + final ByteList spaceBefore = state.getSpaceBefore(); + final ByteList space = state.getSpace(); - buffer.write((byte)'{'); - buffer.write(objectNLBytes); + buffer.write((byte)'{'); + buffer.write(objectNLBytes); - boolean firstPair = true; - for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { - processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); - firstPair = false; - } - state.decreaseDepth(); - if (!firstPair && !objectNl.isEmpty()) { - buffer.write(objectNLBytes); - } - buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); - buffer.write((byte)'}'); + boolean firstPair = true; + for (RubyHash.RubyHashEntry entry : (Set) object.directEntrySet()) { + processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); + firstPair = false; + } + state.decreaseDepth(); + if (!firstPair && !objectNl.isEmpty()) { + buffer.write(objectNLBytes); } + buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); + buffer.write((byte)'}'); } private static void processEntry(ThreadContext context, Session session, OutputStream buffer, RubyHash.RubyHashEntry entry, boolean firstPair, ByteList objectNl, byte[] indent, ByteList spaceBefore, ByteList space) { @@ -460,19 +507,17 @@ private static void processEntry(ThreadContext context, Session session, OutputS } if (keyStr.getMetaClass() == runtime.getString()) { - STRING_HANDLER.generate(context, session, (RubyString) keyStr, buffer); + generateString(context, session, (RubyString) keyStr, buffer); } else { Utils.ensureString(keyStr); - Handler keyHandler = getHandlerFor(runtime, keyStr); - keyHandler.generate(context, session, keyStr, buffer); + generateFor(context, session, keyStr, buffer); } buffer.write(spaceBefore.unsafeBytes()); buffer.write((byte) ':'); buffer.write(space.unsafeBytes()); - Handler valueHandler = getHandlerFor(runtime, value); - valueHandler.generate(context, session, value, buffer); + generateFor(context, session, value, buffer); } catch (Throwable t) { Helpers.throwException(t); } @@ -489,44 +534,63 @@ int guessSize(ThreadContext context, Session session, RubyString object) { @Override void generate(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { - session.getStringEncoder(context).generate(context, object, buffer); + generateString(context, session, object, buffer); } } + private static void generateString(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { + session.getStringEncoder(context).generate(context, object, buffer); + } + private static class ObjectHandler extends Handler { @Override RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - RubyString str = object.asString(); - return STRING_HANDLER.generateNew(context, session, str); + return generateObjectNew(context, session, object); } @Override void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString str = object.asString(); - STRING_HANDLER.generate(context, session, str, buffer); + generateObject(context, session, object, buffer); } } + private static RubyString generateObjectNew(ThreadContext context, Session session, IRubyObject object) { + RubyString str = object.asString(); + return STRING_HANDLER.generateNew(context, session, str); + } + + private static void generateObject(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + generateString(context, session, object.asString(), buffer); + } + private static class GenericHandler extends Handler { @Override RubyString generateNew(ThreadContext context, Session session, IRubyObject object) { - GeneratorState state = session.getState(context); - if (state.strict()) { - throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); - } else if (object.respondsTo("to_json")) { - IRubyObject result = object.callMethod(context, "to_json", state); - if (result instanceof RubyString) return (RubyString)result; - throw context.runtime.newTypeError("to_json must return a String"); - } else { - return OBJECT_HANDLER.generateNew(context, session, object); - } + return generateGenericNew(context, session, object); } @Override void generate(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { - RubyString result = generateNew(context, session, object); - ByteList bytes = result.getByteList(); - buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + generateGeneric(context, session, object, buffer); } } + + private static RubyString generateGenericNew(ThreadContext context, Session session, IRubyObject object) { + GeneratorState state = session.getState(context); + if (state.strict()) { + throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); + } else if (object.respondsTo("to_json")) { + IRubyObject result = object.callMethod(context, "to_json", state); + if (result instanceof RubyString) return (RubyString) result; + throw context.runtime.newTypeError("to_json must return a String"); + } else { + return OBJECT_HANDLER.generateNew(context, session, object); + } + } + + private static void generateGeneric(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + RubyString result = generateGenericNew(context, session, object); + ByteList bytes = result.getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); + } } From f7eede39d1c097645dc42f91bcd1d1448e5e44a1 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 14 Jan 2025 21:18:03 -0600 Subject: [PATCH 12/24] Match C version of fbuffer_append_long --- java/src/json/ext/Generator.java | 40 +++++++++++--------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index ba293eaf..2100d34b 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -302,12 +302,8 @@ private static void generateFixnum(Session session, RubyFixnum object, OutputStr } else if (i == Long.MIN_VALUE) { buffer.write(MIN_VALUE_BYTES_RADIX_10); } else { - boolean neg = i < 0; - if (neg) i = -i; - int newSize = sizeWithDecimalString(i, neg, 0); byte[] charBytes = session.getCharBytes(); - writeDecimalDigitsToArray(charBytes, i, neg, 0, 0, newSize); - buffer.write(charBytes, 0, newSize); + appendFixnum(buffer, charBytes, i); } } @@ -318,32 +314,24 @@ private static void generateFixnum(Session session, RubyFixnum object, OutputStr MIN_VALUE_BYTES_RADIX_10 = ByteList.plain(Long.toString(Long.MIN_VALUE, 10)); } - private static int sizeWithDecimalString(long i, boolean neg, int baseSize) { - int count = 0; - while (i > 9) { - i /= 10; - count++; - } - int newSize = baseSize + count + 1; - - if (neg) newSize++; - - return newSize; + // C: fbuffer_append_long + static void appendFixnum(OutputStream buffer, byte[] buf, long number) throws IOException { + int buffer_end = buf.length; + int len = fltoa(number, buf, buffer_end - 1); + buffer.write(buf, buffer_end - len, len); } - private static void writeDecimalDigitsToArray(byte[] bytes, long i, boolean negative, int begin, int originalSize, int newSize) { - // write digits directly into the prepared byte array - for (int n = newSize - 1; i > 0; n--) { - bytes[begin + n] = decimalByteForDigit(i); - i /= 10; - } + static int fltoa(long number, byte[] buf, int end) { + long sign = number; + int tmp = end; - if (negative) bytes[originalSize] = '-'; + if (sign < 0) number = -number; + do buf[tmp--] = (byte) digits[(int) (number % 10)]; while ((number /= 10) != 0); + if (sign < 0) buf[tmp--] = '-'; + return end - tmp; } - private static byte decimalByteForDigit(long i) { - return (byte) (i % 10 + '0'); - } + private static final char[] digits = {'0', '1','2','3','4','5','6','7','8','9'}; private static class FloatHandler extends Handler { @Override From b11e4f2130b1f22559f6c91d4d0c3cdf4afa21d9 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 14 Jan 2025 21:18:31 -0600 Subject: [PATCH 13/24] Minor tweaks to reduce complexity --- java/src/json/ext/Generator.java | 17 +++++++---------- java/src/json/ext/GeneratorState.java | 4 ++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 2100d34b..3dfcf582 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -390,14 +390,11 @@ private static void generateArray(ThreadContext context, Session session, RubyAr buffer.write('['); buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); - boolean firstItem = true; int length = object.getLength(); - for (int i = 0, t = length; i < t; i++) { + for (int i = 0; i < length; i++) { IRubyObject element = object.eltInternal(i); - if (firstItem) { - firstItem = false; - } else { + if (i > 0) { buffer.write(','); if (!arrayNLEmpty) { buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); @@ -407,10 +404,10 @@ private static void generateArray(ThreadContext context, Session session, RubyAr generateFor(context, session, element, buffer); } - state.decreaseDepth(); + int oldDepth = state.decreaseDepth(); if (!arrayNLEmpty) { buffer.write(arrayNLBytes, arrayNLBegin, arrayNLSize); - Utils.repeatWrite(buffer, indentUnit, state.getDepth()); + Utils.repeatWrite(buffer, indentUnit, oldDepth); } buffer.write((byte) ']'); @@ -451,7 +448,7 @@ private static void generateHash(ThreadContext context, Session session, RubyHas final ByteList spaceBefore = state.getSpaceBefore(); final ByteList space = state.getSpace(); - buffer.write((byte)'{'); + buffer.write('{'); buffer.write(objectNLBytes); boolean firstPair = true; @@ -459,11 +456,11 @@ private static void generateHash(ThreadContext context, Session session, RubyHas processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space); firstPair = false; } - state.decreaseDepth(); + int oldDepth = state.decreaseDepth(); if (!firstPair && !objectNl.isEmpty()) { buffer.write(objectNLBytes); } - buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); + Utils.repeatWrite(buffer, state.getIndent(), oldDepth); buffer.write((byte)'}'); } diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index fdd433c6..0ef042cc 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -536,8 +536,8 @@ public int increaseDepth(ThreadContext context) { return depth; } - public void decreaseDepth() { - --depth; + public int decreaseDepth() { + return --depth; } /** From 97ac36f6fce4a66233698c2cc6f3ee03c16bc811 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 14 Jan 2025 23:37:50 -0600 Subject: [PATCH 14/24] Reimpl byte[] stream without synchronization The byte[] output stream used here extended ByteArrayOutputStream from the JDK, which sychronizes all mutation operations (like writes). Since this is only going to be used once within a given call stack, it needs no synchronization. This change more than triples the performance of a benchmark of dumping an array of empty arrays and should increase performance of all dump forms. --- .../json/ext/ByteListDirectOutputStream.java | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java index 178cf11c..6635d341 100644 --- a/java/src/json/ext/ByteListDirectOutputStream.java +++ b/java/src/json/ext/ByteListDirectOutputStream.java @@ -3,14 +3,54 @@ import org.jcodings.Encoding; import org.jruby.util.ByteList; -import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; + +public class ByteListDirectOutputStream extends OutputStream { + private byte[] buffer; + private int length; -public class ByteListDirectOutputStream extends ByteArrayOutputStream { ByteListDirectOutputStream(int size) { - super(size); + buffer = new byte[size]; } public ByteList toByteListDirect(Encoding encoding) { - return new ByteList(buf, 0, count, encoding, false); + return new ByteList(buffer, 0, length, encoding, false); + } + + @Override + public void write(int b) throws IOException { + int myLength = this.length; + grow(this, buffer, myLength, 1); + buffer[length++] = (byte) b; + } + + @Override + public void write(byte[] bytes, int start, int length) throws IOException { + int myLength = this.length; + grow(this, buffer, myLength, length); + System.arraycopy(bytes, start, buffer, myLength, length); + this.length = myLength + length; + } + + @Override + public void write(byte[] bytes) throws IOException { + int myLength = this.length; + int moreLength = bytes.length; + grow(this, buffer, myLength, moreLength); + System.arraycopy(bytes, 0, buffer, myLength, moreLength); + this.length = myLength + moreLength; + } + + private static void grow(ByteListDirectOutputStream self, byte[] buffer, int myLength, int more) { + int newLength = myLength + more; + int myCapacity = buffer.length; + int diff = newLength - myCapacity; + if (diff > 0) { + // grow to double current length or capacity + diff, whichever is greater + int growBy = Math.max(myLength, diff); + self.buffer = Arrays.copyOf(self.buffer, myCapacity + growBy); + } } } From bd2007a3d23edca073623aeffe75a1eb620c4280 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 15 Jan 2025 11:16:44 -0600 Subject: [PATCH 15/24] Reduce overhead in repeats * Return incoming array if only one repeat is needed and array is exact size. * Only retrieve ByteList fields once for repeat writes. --- java/src/json/ext/Utils.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/java/src/json/ext/Utils.java b/java/src/json/ext/Utils.java index 3d2d4b4f..38491d2e 100644 --- a/java/src/json/ext/Utils.java +++ b/java/src/json/ext/Utils.java @@ -84,17 +84,25 @@ static byte[] repeat(ByteList a, int n) { static byte[] repeat(byte[] a, int begin, int length, int n) { if (length == 0) return ByteList.NULL_ARRAY; + + if (n == 1 && begin == 0 && length == a.length) return a; + int resultLen = length * n; byte[] result = new byte[resultLen]; for (int pos = 0; pos < resultLen; pos += length) { System.arraycopy(a, begin, result, pos, length); } + return result; } static void repeatWrite(OutputStream out, ByteList a, int n) throws IOException { + byte[] bytes = a.unsafeBytes(); + int begin = a.begin(); + int length = a.length(); + for (int i = 0; i < n; i++) { - out.write(a.unsafeBytes(), a.begin(), a.length()); + out.write(bytes, begin, length); } } } From 4f7d40442f6b04843659b5fbe14f0ccf5fdb6a1f Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 14:32:31 -0600 Subject: [PATCH 16/24] Use equivalent of rb_sym2str --- java/src/json/ext/Generator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 3dfcf582..fdd67fc7 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -16,6 +16,7 @@ import org.jruby.RubyFloat; import org.jruby.RubyHash; import org.jruby.RubyString; +import org.jruby.RubySymbol; import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; @@ -486,7 +487,7 @@ private static void processEntry(ThreadContext context, Session session, OutputS keyStr = key.callMethod(context, "to_s"); } } else if (keyClass == runtime.getSymbol()) { - keyStr = key.asString(); + keyStr = ((RubySymbol) key).id2name(context); } else { keyStr = TypeConverter.convertToType(key, runtime.getString(), "to_s"); } From 10d752bbec0b29cf52745460842cb69bb940dc7b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 15:10:39 -0600 Subject: [PATCH 17/24] Microoptimizations for ByteList stream --- .../json/ext/ByteListDirectOutputStream.java | 48 +++++++++++++------ 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java index 6635d341..b22d4812 100644 --- a/java/src/json/ext/ByteListDirectOutputStream.java +++ b/java/src/json/ext/ByteListDirectOutputStream.java @@ -21,36 +21,54 @@ public ByteList toByteListDirect(Encoding encoding) { @Override public void write(int b) throws IOException { - int myLength = this.length; - grow(this, buffer, myLength, 1); - buffer[length++] = (byte) b; + int currentLength = this.length; + int newLength = currentLength + 1; + byte[] buffer = ensureBuffer(this, newLength); + buffer[currentLength] = (byte) b; + this.length = newLength; } @Override public void write(byte[] bytes, int start, int length) throws IOException { - int myLength = this.length; - grow(this, buffer, myLength, length); - System.arraycopy(bytes, start, buffer, myLength, length); - this.length = myLength + length; + int currentLength = this.length; + int newLength = currentLength + length; + byte[] buffer = ensureBuffer(this, newLength); + System.arraycopy(bytes, start, buffer, currentLength, length); + this.length = newLength; } @Override public void write(byte[] bytes) throws IOException { int myLength = this.length; int moreLength = bytes.length; - grow(this, buffer, myLength, moreLength); + int newLength = myLength + moreLength; + byte[] buffer = ensureBuffer(this, newLength); System.arraycopy(bytes, 0, buffer, myLength, moreLength); - this.length = myLength + moreLength; + this.length = newLength; } - private static void grow(ByteListDirectOutputStream self, byte[] buffer, int myLength, int more) { - int newLength = myLength + more; + private static byte[] ensureBuffer(ByteListDirectOutputStream self, int minimumLength) { + byte[] buffer = self.buffer; int myCapacity = buffer.length; - int diff = newLength - myCapacity; + int diff = minimumLength - myCapacity; if (diff > 0) { - // grow to double current length or capacity + diff, whichever is greater - int growBy = Math.max(myLength, diff); - self.buffer = Arrays.copyOf(self.buffer, myCapacity + growBy); + buffer = self.buffer = grow(buffer, myCapacity, diff); + } + + return buffer; + } + + private static byte[] grow(byte[] oldBuffer, int myCapacity, int diff) { + // grow to double current buffer length or capacity + diff, whichever is greater + int newLength = myCapacity + Math.max(myCapacity, diff); + // check overflow + if (newLength < 0) { + // try just diff length in case it can fit + newLength = myCapacity + diff; + if (newLength < 0) { + throw new ArrayIndexOutOfBoundsException("cannot allocate array of size " + myCapacity + "+" + diff); + } } + return Arrays.copyOf(oldBuffer, newLength); } } From 39d410f411ad90fb4075d6615fe215f63d4eef84 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 15:20:19 -0600 Subject: [PATCH 18/24] Cast to byte not necessary --- java/src/json/ext/Generator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index fdd67fc7..90c5ccfc 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -462,7 +462,7 @@ private static void generateHash(ThreadContext context, Session session, RubyHas buffer.write(objectNLBytes); } Utils.repeatWrite(buffer, state.getIndent(), oldDepth); - buffer.write((byte)'}'); + buffer.write('}'); } private static void processEntry(ThreadContext context, Session session, OutputStream buffer, RubyHash.RubyHashEntry entry, boolean firstPair, ByteList objectNl, byte[] indent, ByteList spaceBefore, ByteList space) { From 7f9b6a326d603b6b6fd43548732c8a7647da2b2a Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 15:47:51 -0600 Subject: [PATCH 19/24] Refactor this for better inlining --- java/src/json/ext/StringEncoder.java | 48 ++++++++++++++++------------ 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 6c5d8279..afa14e56 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -141,13 +141,7 @@ final class StringEncoder extends ByteListTranscoder { // C: generate_json_string void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException { - try { - object = ensureValidEncoding(context, object); - } catch (RaiseException re) { - RubyException exc = Utils.buildGeneratorError(context, object, re.getMessage()); - exc.setCause(re.getException()); - throw exc.toThrowable(); - } + object = ensureValidEncoding(context, object); ByteList byteList = object.getByteList(); init(byteList); @@ -171,24 +165,38 @@ void generate(ThreadContext context, RubyString object, OutputStream buffer) thr static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { Encoding encoding = str.getEncoding(); + + if (encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE) { + return str; + } + + return tryWeirdEncodings(context, str, encoding); + } + + private static RubyString tryWeirdEncodings(ThreadContext context, RubyString str, Encoding encoding) { RubyString utf8String; - if (!(encoding == USASCIIEncoding.INSTANCE || encoding == UTF8Encoding.INSTANCE)) { - if (encoding == ASCIIEncoding.INSTANCE) { - utf8String = str.strDup(context.runtime); - utf8String.setEncoding(UTF8Encoding.INSTANCE); - switch (utf8String.getCodeRange()) { - case StringSupport.CR_7BIT: - return utf8String; - case StringSupport.CR_VALID: - // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. - // TODO: Raise in 3.0.0 - context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); - return utf8String; - } + if (encoding == ASCIIEncoding.INSTANCE) { + utf8String = str.strDup(context.runtime); + utf8String.setEncoding(UTF8Encoding.INSTANCE); + switch (utf8String.getCodeRange()) { + case StringSupport.CR_7BIT: + return utf8String; + case StringSupport.CR_VALID: + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. + // TODO: Raise in 3.0.0 + context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + return utf8String; } + } + try { str = (RubyString) str.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + } catch (RaiseException re) { + RubyException exc = Utils.buildGeneratorError(context, str, re.getMessage()); + exc.setCause(re.getException()); + throw exc.toThrowable(); } + return str; } From 70aadd0bea1f4c2c8289c2057953bf845c1a693b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 16:09:29 -0600 Subject: [PATCH 20/24] More tiny tweaks to reduce overhead of generateString --- java/src/json/ext/ByteListTranscoder.java | 4 +++- java/src/json/ext/StringEncoder.java | 23 ++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/java/src/json/ext/ByteListTranscoder.java b/java/src/json/ext/ByteListTranscoder.java index 78d8037c..7ee9de34 100644 --- a/java/src/json/ext/ByteListTranscoder.java +++ b/java/src/json/ext/ByteListTranscoder.java @@ -143,9 +143,11 @@ protected void quoteStart() { * until the character before it. */ protected void quoteStop(int endPos) throws IOException { + int quoteStart = this.quoteStart; if (quoteStart != -1) { + ByteList src = this.src; append(src.unsafeBytes(), src.begin() + quoteStart, endPos - quoteStart); - quoteStart = -1; + this.quoteStart = -1; } } diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index afa14e56..21005203 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -107,10 +107,12 @@ final class StringEncoder extends ByteListTranscoder { //First byte of a 4+ byte code point 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9, }; + private static final byte[] BACKSLASH_U2028 = "\\u2028".getBytes(StandardCharsets.US_ASCII); private static final byte[] BACKSLASH_U2029 = "\\u2029".getBytes(StandardCharsets.US_ASCII); private final boolean asciiOnly, scriptSafe; + private final byte[] escapeTable; OutputStream out; @@ -137,6 +139,11 @@ final class StringEncoder extends ByteListTranscoder { StringEncoder(boolean asciiOnly, boolean scriptSafe) { this.asciiOnly = asciiOnly; this.scriptSafe = scriptSafe; + if (asciiOnly) { + escapeTable = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE; + } else { + escapeTable = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE; + } } // C: generate_json_string @@ -151,9 +158,9 @@ void generate(ThreadContext context, RubyString object, OutputStream buffer) thr case StringSupport.CR_7BIT: case StringSupport.CR_VALID: if (asciiOnly) { - encodeASCII(byteList, scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE); + encodeASCII(byteList); } else { - encode(byteList, scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE); + encode(byteList); } break; default: @@ -201,9 +208,10 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st } // C: convert_UTF8_to_JSON - void encode(ByteList src, byte[] escape_table) throws IOException { + void encode(ByteList src) throws IOException { byte[] hexdig = HEX; byte[] scratch = aux; + byte[] escapeTable = this.escapeTable; byte[] ptrBytes = src.unsafeBytes(); int ptr = src.begin(); @@ -211,10 +219,10 @@ void encode(ByteList src, byte[] escape_table) throws IOException { int beg = 0; int pos = 0; - + while (pos < len) { int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); - int ch_len = escape_table[ch]; + int ch_len = escapeTable[ch]; /* JSON encoding */ if (ch_len > 0) { @@ -278,9 +286,10 @@ private int flushPos(int pos, int beg, byte[] ptrBytes, int ptr, int size) throw } // C: convert_UTF8_to_ASCII_only_JSON - void encodeASCII(ByteList src, byte[] escape_table) throws IOException { + void encodeASCII(ByteList src) throws IOException { byte[] hexdig = HEX; byte[] scratch = aux; + byte[] escapeTable = this.escapeTable; byte[] ptrBytes = src.unsafeBytes(); int ptr = src.begin(); @@ -291,7 +300,7 @@ void encodeASCII(ByteList src, byte[] escape_table) throws IOException { while (pos < len) { int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); - int ch_len = escape_table[ch]; + int ch_len = escapeTable[ch]; if (ch_len != 0) { switch (ch_len) { From 0133cbcfd5fda0d3889f5f5e44af460ace75464d Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 16:42:59 -0600 Subject: [PATCH 21/24] Refactor to avoid repeated boolean checks --- java/src/json/ext/Generator.java | 4 +- java/src/json/ext/StringEncoder.java | 247 +++--------------- java/src/json/ext/StringEncoderAsciiOnly.java | 116 ++++++++ 3 files changed, 158 insertions(+), 209 deletions(-) create mode 100644 java/src/json/ext/StringEncoderAsciiOnly.java diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 90c5ccfc..956efe10 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -185,7 +185,9 @@ public byte[] getCharBytes() { public StringEncoder getStringEncoder(ThreadContext context) { if (stringEncoder == null) { GeneratorState state = getState(context); - stringEncoder = new StringEncoder(state.asciiOnly(), state.scriptSafe()); + stringEncoder = state.asciiOnly() ? + new StringEncoderAsciiOnly(state.scriptSafe()) : + new StringEncoder(state.scriptSafe()); } return stringEncoder; } diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 21005203..d178d0bd 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -9,6 +9,7 @@ import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.USASCIIEncoding; import org.jcodings.specific.UTF8Encoding; +import org.jruby.Ruby; import org.jruby.RubyException; import org.jruby.RubyString; import org.jruby.exceptions.RaiseException; @@ -25,8 +26,8 @@ * to another ByteList. The source string is fully checked for UTF-8 validity, * and throws a GeneratorError if any problem is found. */ -final class StringEncoder extends ByteListTranscoder { - private static final int CHAR_LENGTH_MASK = 7; +class StringEncoder extends ByteListTranscoder { + protected static final int CHAR_LENGTH_MASK = 7; private static final byte[] BACKSLASH_DOUBLEQUOTE = {'\\', '"'}; private static final byte[] BACKSLASH_BACKSLASH = {'\\', '\\'}; private static final byte[] BACKSLASH_FORWARDSLASH = {'\\', '/'}; @@ -111,39 +112,30 @@ final class StringEncoder extends ByteListTranscoder { private static final byte[] BACKSLASH_U2028 = "\\u2028".getBytes(StandardCharsets.US_ASCII); private static final byte[] BACKSLASH_U2029 = "\\u2029".getBytes(StandardCharsets.US_ASCII); - private final boolean asciiOnly, scriptSafe; - private final byte[] escapeTable; + protected final byte[] escapeTable; OutputStream out; // Escaped characters will reuse this array, to avoid new allocations // or appending them byte-by-byte - private final byte[] aux = + protected final byte[] aux = new byte[] {/* First Unicode character */ '\\', 'u', 0, 0, 0, 0, /* Second unicode character (for surrogate pairs) */ '\\', 'u', 0, 0, 0, 0, /* "\X" characters */ '\\', 0}; - // offsets on the array above - private static final int ESCAPE_UNI1_OFFSET = 0; - private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6; - private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6; - /** Array used for code point decomposition in surrogates */ - private final char[] utf16 = new char[2]; - - private static final byte[] HEX = + + protected static final byte[] HEX = new byte[] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; - StringEncoder(boolean asciiOnly, boolean scriptSafe) { - this.asciiOnly = asciiOnly; - this.scriptSafe = scriptSafe; - if (asciiOnly) { - escapeTable = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE; - } else { - escapeTable = scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE; - } + StringEncoder(boolean scriptSafe) { + this(scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ESCAPE_TABLE); + } + + StringEncoder(byte[] escapeTable) { + this.escapeTable = escapeTable; } // C: generate_json_string @@ -157,11 +149,7 @@ void generate(ThreadContext context, RubyString object, OutputStream buffer) thr switch (object.scanForCodeRange()) { case StringSupport.CR_7BIT: case StringSupport.CR_VALID: - if (asciiOnly) { - encodeASCII(byteList); - } else { - encode(byteList); - } + encode(byteList); break; default: throw Utils.buildGeneratorError(context, object, "source sequence is illegal/malformed utf-8").toThrowable(); @@ -181,9 +169,12 @@ static RubyString ensureValidEncoding(ThreadContext context, RubyString str) { } private static RubyString tryWeirdEncodings(ThreadContext context, RubyString str, Encoding encoding) { + Ruby runtime = context.runtime; + RubyString utf8String; + if (encoding == ASCIIEncoding.INSTANCE) { - utf8String = str.strDup(context.runtime); + utf8String = str.strDup(runtime); utf8String.setEncoding(UTF8Encoding.INSTANCE); switch (utf8String.getCodeRange()) { case StringSupport.CR_7BIT: @@ -191,13 +182,13 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st case StringSupport.CR_VALID: // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. // TODO: Raise in 3.0.0 - context.runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); + runtime.getWarnings().warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0"); return utf8String; } } try { - str = (RubyString) str.encode(context, context.runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); + str = (RubyString) str.encode(context, runtime.getEncodingService().convertEncodingToRubyEncoding(UTF8Encoding.INSTANCE)); } catch (RaiseException re) { RubyException exc = Utils.buildGeneratorError(context, str, re.getMessage()); exc.setCause(re.getException()); @@ -229,24 +220,7 @@ void encode(ByteList src) throws IOException { switch (ch_len) { case 9: { beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); - switch (ch) { - case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; - case '\\': appendEscape(BACKSLASH_BACKSLASH); break; - case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; - case '\b': appendEscape(BACKSLASH_B); break; - case '\f': appendEscape(BACKSLASH_F); break; - case '\n': appendEscape(BACKSLASH_N); break; - case '\r': appendEscape(BACKSLASH_R); break; - case '\t': appendEscape(BACKSLASH_T); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - append(scratch, 0, 6); - break; - } - } + escapeAscii(ch, scratch, hexdig); break; } case 11: { @@ -280,110 +254,30 @@ void encode(ByteList src) throws IOException { } } - private int flushPos(int pos, int beg, byte[] ptrBytes, int ptr, int size) throws IOException { + protected int flushPos(int pos, int beg, byte[] ptrBytes, int ptr, int size) throws IOException { if (pos > beg) { append(ptrBytes, ptr + beg, pos - beg); } return pos + size; } - // C: convert_UTF8_to_ASCII_only_JSON - void encodeASCII(ByteList src) throws IOException { - byte[] hexdig = HEX; - byte[] scratch = aux; - byte[] escapeTable = this.escapeTable; - - byte[] ptrBytes = src.unsafeBytes(); - int ptr = src.begin(); - int len = src.realSize(); - - int beg = 0; - int pos = 0; - - while (pos < len) { - int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); - int ch_len = escapeTable[ch]; - - if (ch_len != 0) { - switch (ch_len) { - case 9: { - beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); - switch (ch) { - case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; - case '\\': appendEscape(BACKSLASH_BACKSLASH); break; - case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; - case '\b': appendEscape(BACKSLASH_B); break; - case '\f': appendEscape(BACKSLASH_F); break; - case '\n': appendEscape(BACKSLASH_N); break; - case '\r': appendEscape(BACKSLASH_R); break; - case '\t': appendEscape(BACKSLASH_T); break; - default: { - scratch[2] = '0'; - scratch[3] = '0'; - scratch[4] = hexdig[(ch >> 4) & 0xf]; - scratch[5] = hexdig[ch & 0xf]; - append(scratch, 0, 6); - break; - } - } - break; - } - default: { - int wchar = 0; - ch_len = ch_len & CHAR_LENGTH_MASK; - - switch(ch_len) { - case 2: - wchar = ptrBytes[ptr + pos] & 0x1F; - break; - case 3: - wchar = ptrBytes[ptr + pos] & 0x0F; - break; - case 4: - wchar = ptrBytes[ptr + pos] & CHAR_LENGTH_MASK; - break; - } - - for (short i = 1; i < ch_len; i++) { - wchar = (wchar << 6) | (ptrBytes[ptr + pos +i] & 0x3F); - } - - beg = pos = flushPos(pos, beg, ptrBytes, ptr, ch_len); - - if (wchar <= 0xFFFF) { - scratch[2] = hexdig[wchar >> 12]; - scratch[3] = hexdig[(wchar >> 8) & 0xf]; - scratch[4] = hexdig[(wchar >> 4) & 0xf]; - scratch[5] = hexdig[wchar & 0xf]; - append(scratch, 0, 6); - } else { - int hi, lo; - wchar -= 0x10000; - hi = 0xD800 + (wchar >> 10); - lo = 0xDC00 + (wchar & 0x3FF); - - scratch[2] = hexdig[hi >> 12]; - scratch[3] = hexdig[(hi >> 8) & 0xf]; - scratch[4] = hexdig[(hi >> 4) & 0xf]; - scratch[5] = hexdig[hi & 0xf]; - - scratch[8] = hexdig[lo >> 12]; - scratch[9] = hexdig[(lo >> 8) & 0xf]; - scratch[10] = hexdig[(lo >> 4) & 0xf]; - scratch[11] = hexdig[lo & 0xf]; - - append(scratch, 0, 12); - } - - break; - } - } - } else { - pos++; + protected void escapeAscii(int ch, byte[] scratch, byte[] hexdig) throws IOException { + switch (ch) { + case '"': appendEscape(BACKSLASH_DOUBLEQUOTE); break; + case '\\': appendEscape(BACKSLASH_BACKSLASH); break; + case '/': appendEscape(BACKSLASH_FORWARDSLASH); break; + case '\b': appendEscape(BACKSLASH_B); break; + case '\f': appendEscape(BACKSLASH_F); break; + case '\n': appendEscape(BACKSLASH_N); break; + case '\r': appendEscape(BACKSLASH_R); break; + case '\t': appendEscape(BACKSLASH_T); break; + default: { + scratch[2] = '0'; + scratch[3] = '0'; + scratch[4] = hexdig[(ch >> 4) & 0xf]; + scratch[5] = hexdig[ch & 0xf]; + append(scratch, 0, 6); + break; } } - - if (beg < len) { - append(ptrBytes, ptr + beg, len - beg); - } } private void appendEscape(byte[] escape) throws IOException { @@ -398,69 +292,6 @@ protected void append(byte[] origin, int start, int length) throws IOException { out.write(origin, start, length); } - private void handleChar(int c) throws IOException { - switch (c) { - case '"': - case '\\': - escapeChar((char)c); - break; - case '\n': - escapeChar('n'); - break; - case '\r': - escapeChar('r'); - break; - case '\t': - escapeChar('t'); - break; - case '\f': - escapeChar('f'); - break; - case '\b': - escapeChar('b'); - break; - case '/': - if(scriptSafe) { - escapeChar((char)c); - break; - } - case 0x2028: - case 0x2029: - if (scriptSafe) { - quoteStop(charStart); - escapeUtf8Char(c); - break; - } - default: - if (c >= 0x20 && c <= 0x7f || - (c >= 0x80 && !asciiOnly)) { - quoteStart(); - } else { - quoteStop(charStart); - escapeUtf8Char(c); - } - } - } - - private void escapeChar(char c) throws IOException { - quoteStop(charStart); - aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c; - append(aux, ESCAPE_CHAR_OFFSET, 2); - } - - private void escapeUtf8Char(int codePoint) throws IOException { - int numChars = Character.toChars(codePoint, utf16, 0); - escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2); - if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2); - append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars); - } - - private void escapeCodeUnit(char c, int auxOffset) { - for (int i = 0; i < 4; i++) { - aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf]; - } - } - @Override protected RaiseException invalidUtf8(ThreadContext context) { return Utils.newException(context, Utils.M_GENERATOR_ERROR, "source sequence is illegal/malformed utf-8"); diff --git a/java/src/json/ext/StringEncoderAsciiOnly.java b/java/src/json/ext/StringEncoderAsciiOnly.java new file mode 100644 index 00000000..de1af284 --- /dev/null +++ b/java/src/json/ext/StringEncoderAsciiOnly.java @@ -0,0 +1,116 @@ +/* + * This code is copyrighted work by Daniel Luz . + * + * Distributed under the Ruby license: https://www.ruby-lang.org/en/about/license.txt + */ +package json.ext; + +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; +import org.jcodings.specific.UTF8Encoding; +import org.jruby.RubyException; +import org.jruby.RubyString; +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.util.ByteList; +import org.jruby.util.StringSupport; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + +/** + * An encoder that reads from the given source and outputs its representation + * to another ByteList. The source string is fully checked for UTF-8 validity, + * and throws a GeneratorError if any problem is found. + */ +final class StringEncoderAsciiOnly extends StringEncoder { + StringEncoderAsciiOnly(boolean scriptSafe) { + super(scriptSafe ? SCRIPT_SAFE_ESCAPE_TABLE : ASCII_ONLY_ESCAPE_TABLE); + } + + // C: convert_UTF8_to_ASCII_only_JSON + void encode(ByteList src) throws IOException { + byte[] hexdig = HEX; + byte[] scratch = aux; + byte[] escapeTable = this.escapeTable; + + byte[] ptrBytes = src.unsafeBytes(); + int ptr = src.begin(); + int len = src.realSize(); + + int beg = 0; + int pos = 0; + + while (pos < len) { + int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]); + int ch_len = escapeTable[ch]; + + if (ch_len != 0) { + switch (ch_len) { + case 9: { + beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1); + escapeAscii(ch, scratch, hexdig); + break; + } + default: { + int wchar = 0; + ch_len = ch_len & CHAR_LENGTH_MASK; + + switch(ch_len) { + case 2: + wchar = ptrBytes[ptr + pos] & 0x1F; + break; + case 3: + wchar = ptrBytes[ptr + pos] & 0x0F; + break; + case 4: + wchar = ptrBytes[ptr + pos] & CHAR_LENGTH_MASK; + break; + } + + for (short i = 1; i < ch_len; i++) { + wchar = (wchar << 6) | (ptrBytes[ptr + pos +i] & 0x3F); + } + + beg = pos = flushPos(pos, beg, ptrBytes, ptr, ch_len); + + if (wchar <= 0xFFFF) { + scratch[2] = hexdig[wchar >> 12]; + scratch[3] = hexdig[(wchar >> 8) & 0xf]; + scratch[4] = hexdig[(wchar >> 4) & 0xf]; + scratch[5] = hexdig[wchar & 0xf]; + append(scratch, 0, 6); + } else { + int hi, lo; + wchar -= 0x10000; + hi = 0xD800 + (wchar >> 10); + lo = 0xDC00 + (wchar & 0x3FF); + + scratch[2] = hexdig[hi >> 12]; + scratch[3] = hexdig[(hi >> 8) & 0xf]; + scratch[4] = hexdig[(hi >> 4) & 0xf]; + scratch[5] = hexdig[hi & 0xf]; + + scratch[8] = hexdig[lo >> 12]; + scratch[9] = hexdig[(lo >> 8) & 0xf]; + scratch[10] = hexdig[(lo >> 4) & 0xf]; + scratch[11] = hexdig[lo & 0xf]; + + append(scratch, 0, 12); + } + + break; + } + } + } else { + pos++; + } + } + + if (beg < len) { + append(ptrBytes, ptr + beg, len - beg); + } + } +} From 9de31202ff074a84b6eba4f41b22441c7efc53b3 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 16:54:37 -0600 Subject: [PATCH 22/24] Eliminate memory accesses for digits The math is much faster here than array access, due to bounds checking and pointer dereferencing. --- java/src/json/ext/Generator.java | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index 956efe10..e9940810 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -301,8 +301,9 @@ void generate(ThreadContext context, Session session, RubyFixnum object, OutputS private static void generateFixnum(Session session, RubyFixnum object, OutputStream buffer) throws IOException { long i = object.getLongValue(); if (i == 0) { - buffer.write(ZERO_BYTES); + buffer.write('0'); } else if (i == Long.MIN_VALUE) { + // special case to avoid -i buffer.write(MIN_VALUE_BYTES_RADIX_10); } else { byte[] charBytes = session.getCharBytes(); @@ -310,32 +311,27 @@ private static void generateFixnum(Session session, RubyFixnum object, OutputStr } } - private static final byte[] ZERO_BYTES = new byte[] {(byte)'0'}; - private static final byte[] MIN_VALUE_BYTES_RADIX_10; - - static { - MIN_VALUE_BYTES_RADIX_10 = ByteList.plain(Long.toString(Long.MIN_VALUE, 10)); - } + private static final byte[] MIN_VALUE_BYTES_RADIX_10 = ByteList.plain(Long.toString(Long.MIN_VALUE, 10)); // C: fbuffer_append_long static void appendFixnum(OutputStream buffer, byte[] buf, long number) throws IOException { - int buffer_end = buf.length; - int len = fltoa(number, buf, buffer_end - 1); - buffer.write(buf, buffer_end - len, len); + int end = buf.length; + int len = fltoa(number, buf, end); + buffer.write(buf, end - len, len); } static int fltoa(long number, byte[] buf, int end) { - long sign = number; + boolean negative = number < 0; int tmp = end; - if (sign < 0) number = -number; - do buf[tmp--] = (byte) digits[(int) (number % 10)]; while ((number /= 10) != 0); - if (sign < 0) buf[tmp--] = '-'; + if (negative) number = -number; + do { + buf[--tmp] = (byte) ((int) (number % 10) + '0'); + } while ((number /= 10) != 0); + if (negative) buf[--tmp] = '-'; return end - tmp; } - private static final char[] digits = {'0', '1','2','3','4','5','6','7','8','9'}; - private static class FloatHandler extends Handler { @Override void generate(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { From d0a718c1209ee64584950482f18911becded634e Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 17:39:12 -0600 Subject: [PATCH 23/24] Loosen visibility to avoid accessor methods Java will generated accessor methods for private fields, burning some inlining budget. --- java/src/json/ext/Generator.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index e9940810..cf7b5255 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -298,7 +298,7 @@ void generate(ThreadContext context, Session session, RubyFixnum object, OutputS } } - private static void generateFixnum(Session session, RubyFixnum object, OutputStream buffer) throws IOException { + static void generateFixnum(Session session, RubyFixnum object, OutputStream buffer) throws IOException { long i = object.getLongValue(); if (i == 0) { buffer.write('0'); @@ -339,7 +339,7 @@ void generate(ThreadContext context, Session session, RubyFloat object, OutputSt } } - private static void generateFloat(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { + static void generateFloat(ThreadContext context, Session session, RubyFloat object, OutputStream buffer) throws IOException { double value = object.getValue(); if (Double.isInfinite(value) || Double.isNaN(value)) { @@ -370,7 +370,7 @@ void generate(ThreadContext context, Session session, RubyArray obj } } - private static void generateArray(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { + static void generateArray(ThreadContext context, Session session, RubyArray object, OutputStream buffer) throws IOException { GeneratorState state = session.getState(context); int depth = state.increaseDepth(context); @@ -431,7 +431,7 @@ void generate(ThreadContext context, final Session session, RubyHash object, fin } } - private static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException { + static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException { final GeneratorState state = session.getState(context); final int depth = state.increaseDepth(context); @@ -522,7 +522,7 @@ void generate(ThreadContext context, Session session, RubyString object, OutputS } } - private static void generateString(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { + static void generateString(ThreadContext context, Session session, RubyString object, OutputStream buffer) throws IOException { session.getStringEncoder(context).generate(context, object, buffer); } @@ -538,12 +538,12 @@ void generate(ThreadContext context, Session session, IRubyObject object, Output } } - private static RubyString generateObjectNew(ThreadContext context, Session session, IRubyObject object) { + static RubyString generateObjectNew(ThreadContext context, Session session, IRubyObject object) { RubyString str = object.asString(); return STRING_HANDLER.generateNew(context, session, str); } - private static void generateObject(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + static void generateObject(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { generateString(context, session, object.asString(), buffer); } @@ -559,7 +559,7 @@ void generate(ThreadContext context, Session session, IRubyObject object, Output } } - private static RubyString generateGenericNew(ThreadContext context, Session session, IRubyObject object) { + static RubyString generateGenericNew(ThreadContext context, Session session, IRubyObject object) { GeneratorState state = session.getState(context); if (state.strict()) { throw Utils.buildGeneratorError(context, object, object + " not allowed in JSON").toThrowable(); @@ -572,7 +572,7 @@ private static RubyString generateGenericNew(ThreadContext context, Session sess } } - private static void generateGeneric(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { + static void generateGeneric(ThreadContext context, Session session, IRubyObject object, OutputStream buffer) throws IOException { RubyString result = generateGenericNew(context, session, object); ByteList bytes = result.getByteList(); buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); From 67a00dabaa9238c731efcb0561fe18c51ee7867d Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 16 Jan 2025 18:17:50 -0600 Subject: [PATCH 24/24] Modify parser bench to work without oj or rapidjson --- benchmark/parser.rb | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/benchmark/parser.rb b/benchmark/parser.rb index bacb8e9e..b3597def 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -1,7 +1,13 @@ require "benchmark/ips" require "json" -require "oj" -require "rapidjson" +begin + require "oj" +rescue LoadError +end +begin + require "rapidjson" +rescue LoadError +end if ENV["ONLY"] RUN = ENV["ONLY"].split(/[,: ]/).map{|x| [x.to_sym, true] }.to_h @@ -18,9 +24,13 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] - x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj] - x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] + if defined?(Oj) + x.report("oj") { Oj.load(json_output) } if RUN[:oj] + x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj] + end + if defined?(RapidJSON) + x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] + end x.compare!(order: :baseline) end puts