diff --git a/release-notes/CREDITS-2.x b/release-notes/CREDITS-2.x index 400c0cec40..002eedbd09 100644 --- a/release-notes/CREDITS-2.x +++ b/release-notes/CREDITS-2.x @@ -456,3 +456,8 @@ Jared Stehler (@jaredstehler) Zhanghao (@zhangOranges) * Contributed #1305: Make helper methods of `WriterBasedJsonGenerator` non-final to allow overriding (2.18.0) + +Justin Gosselin (@jgosselin-accesso) + * Reported #1359: Non-surrogate characters being incorrectly combined when + `JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled + (2.18.2) diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index b16ece2c27..3c53c5d767 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -18,6 +18,10 @@ a pure JSON library. #1353: Use fastdoubleparser 1.0.90 (fixed by @pjfanning) +#1359: Non-surrogate characters being incorrectly combined when + `JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8` is enabled + (reported by Justin G) + (fixed by @pjfanning) 2.18.0 (26-Sep-2024) diff --git a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java index bfdd9c2dd8..ec7454811c 100644 --- a/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java +++ b/src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java @@ -1509,7 +1509,7 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f)); } else { // 3- or 4-byte character - if (_isSurrogateChar(ch)) { + if (_isStartOfSurrogatePair(ch)) { final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; @@ -1557,7 +1557,7 @@ private final void _writeStringSegment2(final String text, int offset, final int outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f)); } else { // 3- or 4-byte character - if (_isSurrogateChar(ch)) { + if (_isStartOfSurrogatePair(ch)) { final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features); if (combineSurrogates && offset < end) { char highSurrogate = (char) ch; @@ -2247,8 +2247,9 @@ private byte[] getHexBytes() { } // @since 2.18 - private boolean _isSurrogateChar(int ch) { - return (ch & 0xD800) == 0xD800; + private static boolean _isStartOfSurrogatePair(final int ch) { + // In 0xD800 - 0xDBFF range? + return (ch & 0xFC00) == 0xD800; } } diff --git a/src/test/java/com/fasterxml/jackson/core/json/Surrogate223Test.java b/src/test/java/com/fasterxml/jackson/core/write/SurrogateWrite223Test.java similarity index 65% rename from src/test/java/com/fasterxml/jackson/core/json/Surrogate223Test.java rename to src/test/java/com/fasterxml/jackson/core/write/SurrogateWrite223Test.java index 38e36adba5..aa5b57e4a2 100644 --- a/src/test/java/com/fasterxml/jackson/core/json/Surrogate223Test.java +++ b/src/test/java/com/fasterxml/jackson/core/write/SurrogateWrite223Test.java @@ -1,17 +1,19 @@ -package com.fasterxml.jackson.core.json; +package com.fasterxml.jackson.core.write; import java.io.ByteArrayOutputStream; import java.io.StringWriter; import java.io.Writer; import com.fasterxml.jackson.core.*; +import com.fasterxml.jackson.core.json.JsonWriteFeature; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -class Surrogate223Test extends JUnit5TestBase +class SurrogateWrite223Test extends JUnit5TestBase { private final JsonFactory DEFAULT_JSON_F = newStreamFactory(); @@ -90,4 +92,35 @@ void surrogatesCharBacked() throws Exception assertToken(JsonToken.END_ARRAY, p.nextToken()); p.close(); } + + //https://github.com/FasterXML/jackson-core/issues/1359 + @Test + void checkNonSurrogates() throws Exception { + JsonFactory f = JsonFactory.builder() + .enable(JsonWriteFeature.COMBINE_UNICODE_SURROGATES_IN_UTF8) + .build(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (JsonGenerator gen = f.createGenerator(out)) { + gen.writeStartObject(); + + // Inside the BMP, beyond surrogate block; 0xFF0C - full-width comma + gen.writeStringField("test_full_width", "foo" + new String(Character.toChars(0xFF0C)) + "bar"); + + // Inside the BMP, beyond surrogate block; 0xFE6A - small form percent + gen.writeStringField("test_small_form", "foo" + new String(Character.toChars(0xFE6A)) + "bar"); + + // Inside the BMP, before the surrogate block; 0x3042 - Hiragana A + gen.writeStringField("test_hiragana", "foo" + new String(Character.toChars(0x3042)) + "bar"); + + // Outside the BMP; 0x1F60A - emoji + gen.writeStringField("test_emoji", new String(Character.toChars(0x1F60A))); + + gen.writeEndObject(); + } + String json = out.toString("UTF-8"); + assertTrue(json.contains("foo\uFF0Cbar")); + assertTrue(json.contains("foo\uFE6Abar")); + assertTrue(json.contains("foo\u3042bar")); + assertTrue(json.contains("\"test_emoji\":\"\uD83D\uDE0A\"")); + } }