Skip to content

Commit

Permalink
ICU-22890 MF2: Add ICU4C test for lone surrogates and fix ICU4J surro…
Browse files Browse the repository at this point in the history
…gate handling

This PR adds a test to ICU4C for handling of lone surrogates,
and also changes ICU4J so that this case is a syntax error (per the spec).

Incidentally fix uninitialized-memory bug in MessageFormatter
(initialize `errors` to nullptr)

Co-authored-by: Tim Chevalier <[email protected]>
  • Loading branch information
FrankYFTang and catamorphism committed Sep 19, 2024
1 parent 3205630 commit af45a5b
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 2 deletions.
2 changes: 1 addition & 1 deletion icu4c/source/i18n/unicode/messageformat2.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ namespace message2 {
// Must be a raw pointer to avoid including the internal header file
// defining StaticErrors
// Owned by `this`
StaticErrors* errors;
StaticErrors* errors = nullptr;

}; // class MessageFormatter

Expand Down
30 changes: 30 additions & 0 deletions icu4c/source/test/intltest/messageformat2test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(testAPI);
TESTCASE_AUTO(testAPISimple);
TESTCASE_AUTO(testDataModelAPI);
TESTCASE_AUTO(testHighLoneSurrogate);
TESTCASE_AUTO(testLowLoneSurrogate);
TESTCASE_AUTO(dataDrivenTests);
TESTCASE_AUTO_END;
}
Expand Down Expand Up @@ -263,6 +265,34 @@ void TestMessageFormat2::testAPICustomFunctions() {
delete person;
}

// ICU-22890 lone surrogate cause infinity loop
void TestMessageFormat2::testHighLoneSurrogate() {
IcuTestErrorCode errorCode(*this, "testHighLoneSurrogate");
UParseError pe = { 0, 0, {0}, {0} };
// Lone surrogate with only high surrogate
UnicodeString loneSurrogate({0xda02, 0});
icu::message2::MessageFormatter msgfmt1 =
icu::message2::MessageFormatter::Builder(errorCode)
.setPattern(loneSurrogate, pe, errorCode)
.build(errorCode);
UnicodeString result = msgfmt1.formatToString({}, errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testHighLoneSurrogate");
}

// ICU-22890 lone surrogate cause infinity loop
void TestMessageFormat2::testLowLoneSurrogate() {
IcuTestErrorCode errorCode(*this, "testLowLoneSurrogate");
UParseError pe = { 0, 0, {0}, {0} };
// Lone surrogate with only low surrogate
UnicodeString loneSurrogate({0xdc02, 0});
icu::message2::MessageFormatter msgfmt2 =
icu::message2::MessageFormatter::Builder(errorCode)
.setPattern(loneSurrogate, pe, errorCode)
.build(errorCode);
UnicodeString result = msgfmt2.formatToString({}, errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testLowLoneSurrogate");
}

void TestMessageFormat2::dataDrivenTests() {
IcuTestErrorCode errorCode(*this, "jsonTests");

Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/test/intltest/messageformat2test.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ class TestMessageFormat2: public IntlTest {
void testMessageFormatDateTimeSkeleton(message2::TestCase::Builder&, IcuTestErrorCode&);
void testMf1Behavior(message2::TestCase::Builder&, IcuTestErrorCode&);

void testHighLoneSurrogate(void);
void testLowLoneSurrogate(void);
}; // class TestMessageFormat2

U_NAMESPACE_BEGIN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ int peekChar() {
return buffer.charAt(cursor);
}

// Used when checking for unpaired surrogates
int readChar() {
int ch = peekChar();
cursor++;
return (char) ch;
}

int readCodePoint() {
// TODO: remove this?
// START Detect possible infinite loop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,17 @@ private MFDataModel.PatternPart getPatternPart() throws MFParseException {
}
}

private String getText() {
private String getText() throws MFParseException {
StringBuilder result = new StringBuilder();
while (true) {
int ch = input.readChar();
// Check for unmatched surrogates
checkCondition(!(Character.isHighSurrogate((char) ch)
&& (input.atEnd()
|| !Character.isLowSurrogate((char) input.peekChar()))),
"Unpaired high surrogate");
checkCondition (!Character.isLowSurrogate((char) ch), "Unpaired low surrogate");
input.backup(1);
int cp = input.readCodePoint();
switch (cp) {
case EOF:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,36 @@ public void test() {
mf2.formatToString(Args.NONE));
}

@Test
public void testHighLoneSurrogate() {
try {
MessageFormatter mf2 = MessageFormatter.builder()
.setPattern("\uda02").build();
assertEquals("testHighLoneSurrogate: expected to throw, but didn't",
false, true);
} catch (IllegalArgumentException e) {
// Parse error was thrown, as expected
} catch (Exception e) {
assertEquals("testHighLoneSurrogate: expected IllegalArgumentException "
+ "but threw " + e.toString(), false, true);
}
}

@Test
public void testLowLoneSurrogate() {
try {
MessageFormatter mf2 = MessageFormatter.builder()
.setPattern("\udc02").build();
assertEquals("testLowLoneSurrogate: expected to throw, but didn't",
false, true);
} catch (IllegalArgumentException e) {
// Parse error was thrown, as expected
} catch (Exception e) {
assertEquals("testLowLoneSurrogate: expected IllegalArgumentException "
+ "but threw " + e.toString(), false, true);
}
}

@Test
public void testDateFormat() {
Date expiration = new Date(2022 - 1900, java.util.Calendar.OCTOBER, 27);
Expand Down

0 comments on commit af45a5b

Please sign in to comment.