diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java new file mode 100644 index 0000000000..b295ba0869 --- /dev/null +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/FixedICUDecimalFormat.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.lib.util; + +import com.ibm.icu.text.DecimalFormat; +import com.ibm.icu.text.DecimalFormatSymbols; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class FixedICUDecimalFormat { + + /** + * This regex enables us to correct a problem in ICUs interpretation of the pattern in the case + * where the pattern has no digits before the decimal points, such as "#.##" + * so that there should be zero leading digits if the integer part is 0. + *

+ * See https://unicode-org.atlassian.net/browse/ICU-22558 + *

+ * It captures unescaped digits or '#' characters + * preceding a non-escaped decimal point in an ICU4J pattern string. + *

+ * The whole regex (without java escapes) is {@code (? + *

  • {@code (? + *
  • {@code ([0-9#]*)}: Capture zero or more consecutive digits or '#' characters in group 1.
  • + *
  • {@code (? + * + * + * With this pattern: + * + * + * The match will fail only if there is no non-escaped decimal point. + * + * Package private for unit testing. + */ + static Pattern pattern = Pattern.compile("(? + * See https://unicode-org.atlassian.net/browse/ICU-22558 + *

    + * + * @param patternString - an ICU pattern for a DecimalFormat, to be created and fixed (if needed) + * @return a DecimalFormat properly initialized by the pattern. + */ + public static DecimalFormat fromPattern(String patternString) { + DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(); // default locale + return fromPattern(patternString, symbols); + } + /** + * Package private helper version that takes symbols so that you can define a locale. + * This allows tests to not have to adjust for locale decimal points "." vs. ",", + * by specifying a locale. But it isn't needed for regular usage. + * + * @param patternString - ICU pattern to fix (if needed) + * @param symbols - used to specify locale (because decimal points can be "." or "," + * @return a DecimalFormat properly initialized by the pattern. + */ + static DecimalFormat fromPattern(String patternString, DecimalFormatSymbols symbols) { + DecimalFormat decimalFormat = new DecimalFormat(patternString, symbols); + Matcher matcher = pattern.matcher(patternString); + if (matcher.find()) { + // There IS a decimal point. + String digits = matcher.group(1); // digits or # unescaped, before the decimal point + assert(Objects.equals(".", matcher.group(2))); // 2nd group matches the decimal point itself + long count = digits.chars().filter(Character::isDigit).count(); + if (count == 0) { + // The decimal point is not preceded by any required digits. + // This is the fix ICU is getting wrong. + // + // Note even when ICU fixes this, this code won't break. It's just + // unnecessary. + // + // + decimalFormat.setMinimumIntegerDigits(0); + } + } + return decimalFormat; + } +} diff --git a/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java new file mode 100644 index 0000000000..e1721b8efc --- /dev/null +++ b/daffodil-lib/src/test/scala/org/apache/daffodil/lib/util/TestFixedICUDecimalFormat.java @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.lib.util; + +import com.ibm.icu.text.DecimalFormat; +import com.ibm.icu.text.DecimalFormatSymbols; +import org.junit.Test; +import static junit.framework.TestCase.assertEquals; +import static org.junit.Assert.*; + +import java.util.Locale; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Tests about ICU-22558 + * + * https://unicode-org.atlassian.net/browse/ICU-22558 + */ +public class TestFixedICUDecimalFormat { + + // Create DecimalFormatSymbols for a Locale where the decimal separator is a dot + DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.US); + + /** + * Illustrates the bug in ICU. + */ + @Test + public void testDecimalFormatWithHash() { + // Create DecimalFormat instance with the pattern "#.##" + DecimalFormat decimalFormat = new DecimalFormat("#.##", symbols); + + // Convert the number 0.12 to String + String formattedNumber = decimalFormat.format(0.12); + + // This assert tests for the current behavior. (which is buggy) + assertEquals("0.12", formattedNumber); // Wrong answer: See ICU-22558 + // Assert that the formatted string is ".12" + if (formattedNumber == ".12") { + // see https://unicode-org.atlassian.net/browse/ICU-22558 + fail("Bug ICU-22558 has been fixed. This test set is no longer needed to illustrate the problem."); + } + } + + + Pattern pattern = FixedICUDecimalFormat.pattern; + + /** + * Tests to be sure the regex that is used in the fix is robust. + */ + @Test + public void testExtractDecimalPointAndPrecedingUnescapedDigits0() { + Matcher matcher = pattern.matcher("\\."); + assertFalse(matcher.find()); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedDigits1() { + Matcher matcher = pattern.matcher("#.##"); + assertTrue(matcher.find()); + assertEquals("#", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedDigits2() { + Matcher matcher = pattern.matcher("\\#.##"); + assertTrue(matcher.find()); + assertEquals("", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar3() { + Matcher matcher = pattern.matcher("'text'#.##"); + assertTrue(matcher.find()); + assertEquals("#", matcher.group(1)); + assertEquals(".", matcher.group(2)); } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar4() { + Matcher matcher = pattern.matcher(".##"); + assertTrue(matcher.find()); + assertEquals("", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar5() { + Matcher matcher = pattern.matcher("."); + assertTrue(matcher.find()); + assertEquals("", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar6() { + Matcher matcher = pattern.matcher("5.#"); + assertTrue(matcher.find()); + assertEquals("5", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar7() { + Matcher matcher = pattern.matcher("5#.#"); + assertTrue(matcher.find()); + assertEquals("5#", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + @Test + public void testExtractDecimalPointAndPrecedingUnescapedChar8() { + Matcher matcher = pattern.matcher("$\\.$#7\\##9#5#.#"); + assertTrue(matcher.find()); + assertEquals("#9#5#", matcher.group(1)); + assertEquals(".", matcher.group(2)); + } + + + /** + * Show that the fix works. + */ + @Test + public void testFixedDecimalFormatWithHash() { + DecimalFormat decimalFormat = FixedICUDecimalFormat.fromPattern("#.##", symbols); + String formattedNumber = decimalFormat.format(0.12); + assertEquals(".12", formattedNumber); + } + + /** + * Show that the fix doesn't break patterns with leading digits. + */ + @Test + public void testFixedDecimalFormatWithZero() { + DecimalFormat decimalFormat = FixedICUDecimalFormat.fromPattern("0.##", symbols); + String formattedNumber = decimalFormat.format(0.12); + assertEquals("0.12", formattedNumber); + } +} + + + +