Skip to content

Commit

Permalink
[iOS][globalization] Fix IndexOf on empty strings on iOS to return -1 (
Browse files Browse the repository at this point in the history
  • Loading branch information
matouskozak authored Jan 30, 2025
1 parent e3f3598 commit 4e01649
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,17 @@ public class CompareInfoIndexOfTests : CompareInfoTestsBase
{
public static IEnumerable<object[]> IndexOf_TestData()
{
// Empty string
// Empty string, invariant
yield return new object[] { s_invariantCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
yield return new object[] { s_invariantCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
yield return new object[] { s_invariantCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
yield return new object[] { s_invariantCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };

// Empty string, using non-invariant (s_germanCompare) CompareInfo to test the ICU path
yield return new object[] { s_germanCompare, "foo", "", 0, 3, CompareOptions.None, 0, 0 };
yield return new object[] { s_germanCompare, "foo", "", 2, 1, CompareOptions.None, 2, 0 };
yield return new object[] { s_germanCompare, "", "", 0, 0, CompareOptions.None, 0, 0 };
yield return new object[] { s_germanCompare, "", "foo", 0, 0, CompareOptions.None, -1, 0 };

// OrdinalIgnoreCase
yield return new object[] { s_invariantCompare, "Hello", "l", 0, 5, CompareOptions.OrdinalIgnoreCase, 2, 1 };
Expand Down Expand Up @@ -138,8 +145,8 @@ public static IEnumerable<object[]> IndexOf_TestData()
}

// Inputs where matched length does not equal value string length
yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 7 };
yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, 4, 6 };
if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform)
{
yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, CompareOptions.IgnoreNonSpace, 3, 2 };
Expand All @@ -153,7 +160,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
{
bool useNls = PlatformDetection.IsNlsGlobalization;
// Searches for the ligature \u00C6
string source1 = "Is AE or ae the same as \u00C6 or \u00E6?"; // 3 failures here
string source1 = "Is AE or ae the same as \u00C6 or \u00E6?";
yield return new object[] { s_invariantCompare, source1, "AE", 8, 18, CompareOptions.None, useNls ? 24 : -1, useNls ? 1 : 0};
yield return new object[] { s_invariantCompare, source1, "ae", 8, 18, CompareOptions.None, 9 , 2};
yield return new object[] { s_invariantCompare, source1, "\u00C6", 8, 18, CompareOptions.None, 24, 1 };
Expand All @@ -171,7 +178,7 @@ public static IEnumerable<object[]> IndexOf_Aesc_Ligature_TestData()
public static IEnumerable<object[]> IndexOf_U_WithDiaeresis_TestData()
{
// Searches for the combining character sequence Latin capital letter U with diaeresis or Latin small letter u with diaeresis.
string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?"; // 7 failures here
string source = "Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?";
yield return new object[] { s_invariantCompare, source, "U\u0308", 8, 18, CompareOptions.None, 24, 1 };
yield return new object[] { s_invariantCompare, source, "u\u0308", 8, 18, CompareOptions.None, 9, 2 };
yield return new object[] { s_invariantCompare, source, "\u00DC", 8, 18, CompareOptions.None, 24, 1 };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ public static void Contains_Char(string s, char value, bool expected)
[InlineData("Hello", 'e', StringComparison.CurrentCulture, true)]
[InlineData("Hello", 'E', StringComparison.CurrentCulture, false)]
[InlineData("", 'H', StringComparison.CurrentCulture, false)]
[InlineData("", '\u0301', StringComparison.CurrentCulture, false)] // Using non-ASCII character to test ICU path
// CurrentCultureIgnoreCase
[InlineData("Hello", 'H', StringComparison.CurrentCultureIgnoreCase, true)]
[InlineData("Hello", 'Z', StringComparison.CurrentCultureIgnoreCase, false)]
Expand Down
20 changes: 12 additions & 8 deletions src/native/libs/System.Globalization.Native/pal_collation.m
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int3
}
}

/**
* Removes zero-width and other weightless characters such as U+200B (Zero Width Space),
* U+200C (Zero Width Non-Joiner), U+200D (Zero Width Joiner), U+FEFF (Zero Width No-Break Space),
* and the NUL character from the specified string.
*/
static NSString* RemoveWeightlessCharacters(NSString* source)
{
NSError *error = nil;
Expand All @@ -143,10 +148,9 @@ static int32_t IsIndexFound(int32_t fromBeginning, int32_t foundLocation, int32_

/*
Function: IndexOf
Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md
Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md#string-indexing
*/
Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength,
const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning)
{
@autoreleasepool
{
Expand All @@ -158,6 +162,9 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
return result;
}
NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions, true);
if (!fromBeginning) // LastIndexOf
options |= NSBackwardsSearch;

NSString *searchString = [NSString stringWithCharacters: lpTarget length: (NSUInteger)cwTargetLength];
NSString *searchStrCleaned = RemoveWeightlessCharacters(searchString);
NSString *sourceString = [NSString stringWithCharacters: lpSource length: (NSUInteger)cwSourceLength];
Expand All @@ -168,7 +175,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
searchStrCleaned = ConvertToKatakana(searchStrCleaned);
}

if (sourceStrCleaned.length == 0 || searchStrCleaned.length == 0)
if (searchStrCleaned.length == 0)
{
result.location = fromBeginning ? 0 : (int32_t)sourceString.length;
return result;
Expand All @@ -178,9 +185,6 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
NSString *searchStrPrecomposed = searchStrCleaned.precomposedStringWithCanonicalMapping;
NSString *sourceStrPrecomposed = sourceStrCleaned.precomposedStringWithCanonicalMapping;

// last index
if (!fromBeginning)
options |= NSBackwardsSearch;

// check if there is a possible match and return -1 if not
// doesn't matter which normalization form is used here
Expand Down Expand Up @@ -233,7 +237,7 @@ Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNam
result.location = (int32_t)precomposedRange.location;
result.length = (int32_t)precomposedRange.length;
if (!(comparisonOptions & IgnoreCase))
return result;
return result;
}

// check if sourceString has decomposed form of characters and searchString has precomposed form of characters
Expand Down

0 comments on commit 4e01649

Please sign in to comment.