Skip to content

Commit

Permalink
Reintroduce case sensitive comparison optimization for FrozenDictiona…
Browse files Browse the repository at this point in the history
…ry in some cases (#95232)

* Reintroduce case sensitive comparison optimization for FrozenDictionary in some cases

* Renamed a few symbols and added comments for additional clarity

---------

Co-authored-by: Stephen Toub <[email protected]>
  • Loading branch information
andrewjsaid and stephentoub authored Feb 15, 2024
1 parent 3848f56 commit bbb97a7
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static AnalysisResults Analyze(
AnalysisResults results;
if (minLength == 0 || !TryUseSubstring(uniqueStrings, ignoreCase, minLength, maxLength, out results))
{
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, isSubstring: false, static (s, _, _) => s.AsSpan());
results = CreateAnalysisResults(uniqueStrings, ignoreCase, minLength, maxLength, 0, 0, static (s, _, _) => s.AsSpan());
}

return results;
Expand Down Expand Up @@ -77,7 +77,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, index, count, isSubstring: true,
uniqueStrings, ignoreCase, minLength, maxLength, index, count,
static (string s, int index, int count) => s.AsSpan(index, count));
return true;
}
Expand All @@ -101,7 +101,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
if (HasSufficientUniquenessFactor(set, uniqueStrings, acceptableNonUniqueCount))
{
results = CreateAnalysisResults(
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count, isSubstring: true,
uniqueStrings, ignoreCase, minLength, maxLength, comparer.Index, count,
static (string s, int index, int count) => s.AsSpan(s.Length + index, count));
return true;
}
Expand All @@ -115,7 +115,7 @@ private static bool TryUseSubstring(ReadOnlySpan<string> uniqueStrings, bool ign
}

private static AnalysisResults CreateAnalysisResults(
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, bool isSubstring, GetSpan getSubstringSpan)
ReadOnlySpan<string> uniqueStrings, bool ignoreCase, int minLength, int maxLength, int index, int count, GetSpan getHashString)
{
// Start off by assuming all strings are ASCII
bool allAsciiIfIgnoreCase = true;
Expand All @@ -125,30 +125,42 @@ private static AnalysisResults CreateAnalysisResults(
// substrings are ASCII, so we check each.
if (ignoreCase)
{
// Further, if the ASCII keys (in their entirety) don't contain any letters, then we can
// actually perform the comparison as case-sensitive even if case-insensitive
// was requested, as there's nothing that would compare equally to the substring
// other than the substring itself.
bool canSwitchIgnoreCaseHashToCaseSensitive = !isSubstring;
// Further, if the ASCII keys (in their entirety) don't contain any letters,
// then we can actually perform the comparison as case-sensitive even if
// case-insensitive was requested, as there's nothing that would compare
// equally to the key other than the key itself.
bool canSwitchIgnoreCaseHashToCaseSensitive = true;

foreach (string s in uniqueStrings)
foreach (string uniqueString in uniqueStrings)
{
// Get the span for the substring.
ReadOnlySpan<char> substring = getSubstringSpan(s, index, count);
// Get a span representing the slice of the uniqueString which will be hashed.
ReadOnlySpan<char> hashString = getHashString(uniqueString, index, count);

// If the substring isn't ASCII, bail out to return the results.
if (!IsAllAscii(substring))
// If the slice isn't ASCII, bail out to return the results.
if (!IsAllAscii(hashString))
{
allAsciiIfIgnoreCase = false;
canSwitchIgnoreCaseHashToCaseSensitive = false;
break;
}

// All substrings so far are still ASCII only. If this one contains any ASCII
// letters, mark that we can't switch to case-sensitive.
if (canSwitchIgnoreCaseHashToCaseSensitive && ContainsAnyLetters(substring))
// The hash string is ASCII only. We disable the switch to
// case sensitive if by examining the entire uniqueString we
// find that it is not ASCII, or that it contains ASCII letters.
if (canSwitchIgnoreCaseHashToCaseSensitive)
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
// If count is 0 then uniqueString equals hashString,
// and as we have just checked that IsAllAscii(hashString) is true
// then we know IsAllAscii(uniqueString) must be true,
// so we can skip the check.
if (count > 0 && !IsAllAscii(uniqueString.AsSpan()))
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
}
else if (ContainsAnyAsciiLetters(uniqueString.AsSpan()))
{
canSwitchIgnoreCaseHashToCaseSensitive = false;
}
}
}

Expand Down Expand Up @@ -207,7 +219,7 @@ internal static unsafe bool IsAllAscii(ReadOnlySpan<char> s)
#if NET8_0_OR_GREATER
private static readonly SearchValues<char> s_asciiLetters = SearchValues.Create("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
#endif
internal static bool ContainsAnyLetters(ReadOnlySpan<char> s)
internal static bool ContainsAnyAsciiLetters(ReadOnlySpan<char> s)
{
Debug.Assert(IsAllAscii(s));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ public static void LeftHandCaseInsensitive()
Assert.Equal(0, r.HashIndex);
Assert.Equal(1, r.HashCount);

r = RunAnalysis(new[] { "0001", "0002", "0003", "0004", "0005", "0006" }, true);
Assert.False(r.RightJustifiedSubstring);
Assert.False(r.IgnoreCase);
Assert.True(r.AllAsciiIfIgnoreCase);
Assert.Equal(3, r.HashIndex);
Assert.Equal(1, r.HashCount);

}

[Fact]
Expand Down Expand Up @@ -226,9 +233,9 @@ public static void IsAllAscii()
[Fact]
public static void ContainsAnyLetters()
{
Assert.True(KeyAnalyzer.ContainsAnyLetters("abc".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyLetters("ABC".AsSpan()));
Assert.False(KeyAnalyzer.ContainsAnyLetters("123".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("abc".AsSpan()));
Assert.True(KeyAnalyzer.ContainsAnyAsciiLetters("ABC".AsSpan()));
Assert.False(KeyAnalyzer.ContainsAnyAsciiLetters("123".AsSpan()));
// note, must only pass ASCII to ContainsAnyLetters, anything else is a
// Debug.Assert and would not have been called in the actual implementation
}
Expand Down

0 comments on commit bbb97a7

Please sign in to comment.