From 34bf37a54ba5b4bdf03af2778e105f5ff82388a6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 7 Jan 2022 03:37:49 +0300 Subject: [PATCH 01/39] Improve "lastChar == firstChar" case, also, use IndexOf directly if value.Length == 1 --- .../src/System/SpanHelpers.Char.cs | 170 +++++++++++++++++- 1 file changed, 168 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 0c13e743674148..a41481592fe4cb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -22,12 +22,24 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. + int index = 0; char valueHead = value; - ref char valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; + + if (valueTailLength == 0) + { + // for single-char values use plain IndexOf + return IndexOf(ref searchSpace, value, searchSpaceLength); + } + + // Avx2 implies Sse2 + if ((Sse2.IsSupported || AdvSimd.IsSupported) && + searchSpaceLength - valueTailLength >= Vector128.Count) + goto SEARCH_TWO_CHARS; + + ref char valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; - int index = 0; while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". @@ -54,6 +66,160 @@ ref Unsafe.As(ref valueTail), index++; } return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_CHARS: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + { + // Find the last unique (which is not equal to ch1) character + // the algorithm is fine if both are equal, just a little bit less efficient + ushort ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create((ushort)value); + Vector256 ch2 = Vector256.Create(ch2Val); + + NEXT_AVX: + uint mask = (uint)Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, index)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance))).AsByte()); + + while (mask != 0) + { + // div by 2 (shr) because we work with 2-byte chars + int bitPos = (int)((uint)BitOperations.TrailingZeroCount(mask) / 2); + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + bitPos)), + ref Unsafe.As(ref value), + (nuint)(uint)valueLength * 2)) + { + return index + bitPos; + } + mask &= mask - 1; + } + index += Vector256.Count; + + if (index + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (index + valueTailLength + Vector256.Count > searchSpaceLength) + index = searchSpaceLength - valueTailLength - Vector256.Count; + + goto NEXT_AVX; + } + else if (Sse2.IsSupported) + { + // Find the last unique (which is not equal to ch1) character + // the algorithm is fine if both are equal, just a little bit less efficient + ushort ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create((ushort)value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_SSE: + uint mask = (uint)Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsByte()); + + while (mask != 0) + { + // div by 2 (shr) because we work with 2-byte chars + int bitPos = (int)((uint)BitOperations.TrailingZeroCount(mask) / 2); + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + bitPos)), + ref Unsafe.As(ref value), + (nuint)(uint)valueLength * 2)) + { + return index + bitPos; + } + mask &= mask - 1; + } + index += Vector128.Count; + + if (index + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (index + valueTailLength + Vector128.Count > searchSpaceLength) + index = searchSpaceLength - valueTailLength - Vector128.Count; + + goto NEXT_SSE; + } + else if (AdvSimd.IsSupported) + { + // Find the last unique (which is not equal to ch1) character + // the algorithm is fine if both are equal, just a little bit less efficient + ushort ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create((ushort)value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_ADVSIMD: + Vector128 bothEq = AdvSimd.And( + AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), + AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsUInt64(); + + ulong mask = AdvSimd.Extract(bothEq, 0); + if (mask != 0) + { + for (int i = 0; i < 4; i++) + { + if ((mask & 0xFFFF) != 0 && + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + i)), + ref Unsafe.As(ref value), + (nuint)(uint)valueLength * 2)) + { + return index + i; + } + mask >>= 16; + } + } + + // Inspect the second lane + mask = AdvSimd.Extract(bothEq, 1); + if (mask != 0) + { + for (int i = 0; i < 4; i++) + { + if ((mask & 0xFFFF) != 0 && + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + i + 4)), + ref Unsafe.As(ref value), + (nuint)(uint)valueTailLength * 2)) + { + return index + i + 4; + } + mask >>= 16; + } + } + + index += Vector128.Count; + + if (index + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (index + valueTailLength + Vector128.Count > searchSpaceLength) + index = searchSpaceLength - valueTailLength - Vector128.Count; + + goto NEXT_ADVSIMD; + } + + Debug.Fail("Unreachable"); + return -1; } [MethodImpl(MethodImplOptions.AggressiveOptimization)] From 5cfdb161a6eec79e17c53a6e4fa42e86f6aa69ed Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 7 Jan 2022 14:31:42 +0300 Subject: [PATCH 02/39] Try plain IndexOf first, to optimize cases where even first char of value is never met --- .../src/System/SpanHelpers.Char.cs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a41481592fe4cb..014850209352be 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -32,11 +32,6 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char return IndexOf(ref searchSpace, value, searchSpaceLength); } - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && - searchSpaceLength - valueTailLength >= Vector128.Count) - goto SEARCH_TWO_CHARS; - ref char valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; @@ -64,11 +59,19 @@ ref Unsafe.As(ref valueTail), remainingSearchSpaceLength--; index++; + + // Since we've just hit a false-positive let's switch to "Algorithm 1: Generic SIMD" + // as described in http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd + // Avx2 implies Sse2 + if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_CHARS; + } } return -1; - // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła - // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) { From e36fdc6a87b50cdc5fe0694ffa95cfd7f3c6a05c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 7 Jan 2022 16:34:40 +0300 Subject: [PATCH 03/39] add 1-byte implementation --- .../src/System/SpanHelpers.Byte.cs | 153 +++++++++++++++++- 1 file changed, 152 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 05a52499925bfe..2cb484329988d5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -23,8 +23,15 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte return 0; // A zero-length sequence is always treated as "found" at the start of the search space. byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; + + if (valueTailLength == 0) + { + // for single-byte values use plain IndexOf + return IndexOf(ref searchSpace, value, searchSpaceLength); + } + + ref byte valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; int offset = 0; @@ -47,7 +54,151 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte remainingSearchSpaceLength--; offset++; + + // Since we've just hit a false-positive let's switch to "Algorithm 1: Generic SIMD" + // as described in http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd + // Avx2 implies Sse2 + if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + } + return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_BYTES: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + { + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create(value); + Vector256 ch2 = Vector256.Create(ch2Val); + + NEXT_AVX: + uint mask = (uint)Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + return offset + bitPos; + mask &= mask - 1; + } + offset += Vector256.Count; + + if (offset + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (offset + valueTailLength + Vector256.Count > searchSpaceLength) + offset = searchSpaceLength - valueTailLength - Vector256.Count; + + goto NEXT_AVX; } + else if (Sse2.IsSupported) + { + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_SSE: + uint mask = (uint)Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + return offset + bitPos; + mask &= mask - 1; + } + offset += Vector128.Count; + + if (offset + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (offset + valueTailLength + Vector128.Count > searchSpaceLength) + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + goto NEXT_SSE; + } + else if (AdvSimd.IsSupported) + { + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_ADVSIMD: + Vector128 bothEq = AdvSimd.And( + AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsUInt64(); + + ulong mask = AdvSimd.Extract(bothEq, 0); + if (mask != 0) + { + for (int i = 0; i < 8; i++) + { + if ((mask & 0xFF) != 0 && + SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i), ref value, (nuint)(uint)valueLength)) + { + return offset + i; + } + mask >>= 8; + } + } + + // Inspect the second lane + mask = AdvSimd.Extract(bothEq, 1); + if (mask != 0) + { + for (int i = 0; i < 8; i++) + { + if ((mask & 0xFF) != 0 && + SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i + 8), ref value, (nuint)(uint)valueLength)) + { + return offset + i + 8; + } + mask >>= 8; + } + } + + offset += Vector128.Count; + + if (offset + valueTailLength == searchSpaceLength) + return -1; + + // Overlap with the current chunk if there is not enough room for the next one + if (offset + valueTailLength + Vector128.Count > searchSpaceLength) + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + goto NEXT_ADVSIMD; + } + + Debug.Fail("Unreachable"); return -1; } From 85c232048e590b3f5cefd3ad407d5bf9c448104e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 7 Jan 2022 16:41:03 +0300 Subject: [PATCH 04/39] copyrights --- THIRD-PARTY-NOTICES.TXT | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index e38f6ef907d430..55329a8b022944 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -697,6 +697,35 @@ License for fastmod (https://github.com/lemire/fastmod) and ibm-fpgen (https://g See the License for the specific language governing permissions and limitations under the License. +License for sse4-strstr (https://github.com/WojciechMula/sse4-strstr) +-------------------------------------- + + Copyright (c) 2008-2016, Wojciech Muła + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + License notice for The C++ REST SDK ----------------------------------- From 8918ab6e7a7cf8839f35ac419eca326bcfc16cec Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 7 Jan 2022 17:11:10 +0300 Subject: [PATCH 05/39] fix copy-paste mistake --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 2cb484329988d5..9977eaf6665c8a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -58,7 +58,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte // Since we've just hit a false-positive let's switch to "Algorithm 1: Generic SIMD" // as described in http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } From cb32d34c397594bfa33f903e88e1084b8ff50f4c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 8 Jan 2022 15:36:47 +0300 Subject: [PATCH 06/39] Initial LastIndexOf impl --- .../src/System/SpanHelpers.Byte.cs | 190 ++++++++++++++++-- .../src/System/SpanHelpers.Char.cs | 14 +- 2 files changed, 184 insertions(+), 20 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 9977eaf6665c8a..bb88a45bde3542 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -22,7 +22,6 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - byte valueHead = value; int valueTailLength = valueLength - 1; if (valueTailLength == 0) @@ -31,10 +30,17 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte return IndexOf(ref searchSpace, value, searchSpaceLength); } + int offset = 0; + // Avx2 implies Sse2 + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + + byte valueHead = value; ref byte valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; - int offset = 0; while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". @@ -54,14 +60,6 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte remainingSearchSpaceLength--; offset++; - - // Since we've just hit a false-positive let's switch to "Algorithm 1: Generic SIMD" - // as described in http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) - { - goto SEARCH_TWO_BYTES; - } } return -1; @@ -567,11 +565,23 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b if (valueLength == 0) return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + { + // for single-byte values use plain LastIndexOf + return LastIndexOf(ref searchSpace, value, searchSpaceLength); + } + int offset = 0; + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + + byte valueHead = value; + ref byte valueTail = ref Unsafe.Add(ref value, 1); + while (true) { Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". @@ -591,6 +601,162 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b offset += remainingSearchSpaceLength - relativeIndex; } return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_BYTES: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + { + offset = searchSpaceLength - valueTailLength - Vector256.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create(value); + Vector256 ch2 = Vector256.Create(ch2Val); + + NEXT_AVX: + uint mask = (uint)Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + // Unlike normal IndexOf, we need to find the last index even in the current chunk (there can be multiple results) + // Alternative solutions is to "reverse" the mask or use LZCNT? + int lastGoodPos = -1; + while (mask != 0) + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + lastGoodPos = bitPos + offset; + mask &= mask - 1; + } + if (lastGoodPos != -1) + return lastGoodPos; + + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + goto NEXT_AVX; + } + else if (Sse2.IsSupported) + { + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_SSE: + uint mask = (uint)Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + // Unlike normal IndexOf, we need to find the last index even in the current chunk (there can be multiple results) + // Alternative solutions is to "reverse" the mask or use LZCNT? + int lastGoodPos = -1; + while (mask != 0) + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + lastGoodPos = bitPos + offset; + mask &= mask - 1; + } + if (lastGoodPos != -1) + return lastGoodPos; + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + goto NEXT_SSE; + } + else if (AdvSimd.IsSupported) + { + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + NEXT_ADVSIMD: + Vector128 bothEq = AdvSimd.And( + AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsUInt64(); + + ulong mask = AdvSimd.Extract(bothEq, 0); + int lastGoodPos = -1; + + if (mask != 0) + { + for (int i = 0; i < 8; i++) + { + if ((mask & 0xFF) != 0 && + SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i), ref value, (nuint)(uint)valueLength)) + { + lastGoodPos = offset + i; + } + mask >>= 8; + } + } + + if (lastGoodPos != -1) + return lastGoodPos; + + // Inspect the second lane + mask = AdvSimd.Extract(bothEq, 1); + if (mask != 0) + { + for (int i = 0; i < 8; i++) + { + if ((mask & 0xFF) != 0 && + SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i + 8), ref value, (nuint)(uint)valueLength)) + { + lastGoodPos = offset + i + 8; + } + mask >>= 8; + } + } + + if (lastGoodPos != -1) + return lastGoodPos; + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + goto NEXT_ADVSIMD; + } + + Debug.Fail("Unreachable"); + return -1; } [MethodImpl(MethodImplOptions.AggressiveOptimization)] diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 014850209352be..5f8d1181633b0b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -32,6 +32,12 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char return IndexOf(ref searchSpace, value, searchSpaceLength); } + // Avx2 implies Sse2 + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_CHARS; + } + ref char valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; @@ -59,14 +65,6 @@ ref Unsafe.As(ref valueTail), remainingSearchSpaceLength--; index++; - - // Since we've just hit a false-positive let's switch to "Algorithm 1: Generic SIMD" - // as described in http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && remainingSearchSpaceLength - valueTailLength >= Vector128.Count) - { - goto SEARCH_TWO_CHARS; - } } return -1; From cda6b50ac329a75f7d718798257b9d7a97f7faa2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 8 Jan 2022 19:00:33 +0300 Subject: [PATCH 07/39] More efficient LastIndexOf --- .../src/System/SpanHelpers.Byte.cs | 91 ++----------------- 1 file changed, 9 insertions(+), 82 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index bb88a45bde3542..8e52951e123ab7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -574,7 +574,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b } int offset = 0; - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported/* || AdvSimd.IsSupported*/) && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -625,18 +625,14 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - // Unlike normal IndexOf, we need to find the last index even in the current chunk (there can be multiple results) - // Alternative solutions is to "reverse" the mask or use LZCNT? - int lastGoodPos = -1; while (mask != 0) { - int bitPos = BitOperations.TrailingZeroCount(mask); + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) - lastGoodPos = bitPos + offset; - mask &= mask - 1; + return bitPos + offset; + mask &= ~(uint)(1 << bitPos); // clear the highest set bit. } - if (lastGoodPos != -1) - return lastGoodPos; offset -= Vector256.Count; if (offset == -Vector256.Count) @@ -667,18 +663,14 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - // Unlike normal IndexOf, we need to find the last index even in the current chunk (there can be multiple results) - // Alternative solutions is to "reverse" the mask or use LZCNT? - int lastGoodPos = -1; while (mask != 0) { - int bitPos = BitOperations.TrailingZeroCount(mask); + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) - lastGoodPos = bitPos + offset; - mask &= mask - 1; + return bitPos + offset; + mask &= ~(uint)(1 << bitPos); // clear the highest set bit. } - if (lastGoodPos != -1) - return lastGoodPos; offset -= Vector128.Count; if (offset == -Vector128.Count) @@ -689,71 +681,6 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b goto NEXT_SSE; } - else if (AdvSimd.IsSupported) - { - offset = searchSpaceLength - valueTailLength - Vector128.Count; - - // Find the last unique (which is not equal to ch1) byte - // the algorithm is fine if both are equal, just a little bit less efficient - byte ch2Val = Unsafe.Add(ref value, valueTailLength); - int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) - ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - - Vector128 ch1 = Vector128.Create(value); - Vector128 ch2 = Vector128.Create(ch2Val); - - NEXT_ADVSIMD: - Vector128 bothEq = AdvSimd.And( - AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsUInt64(); - - ulong mask = AdvSimd.Extract(bothEq, 0); - int lastGoodPos = -1; - - if (mask != 0) - { - for (int i = 0; i < 8; i++) - { - if ((mask & 0xFF) != 0 && - SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i), ref value, (nuint)(uint)valueLength)) - { - lastGoodPos = offset + i; - } - mask >>= 8; - } - } - - if (lastGoodPos != -1) - return lastGoodPos; - - // Inspect the second lane - mask = AdvSimd.Extract(bothEq, 1); - if (mask != 0) - { - for (int i = 0; i < 8; i++) - { - if ((mask & 0xFF) != 0 && - SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i + 8), ref value, (nuint)(uint)valueLength)) - { - lastGoodPos = offset + i + 8; - } - mask >>= 8; - } - } - - if (lastGoodPos != -1) - return lastGoodPos; - - offset -= Vector128.Count; - if (offset == -Vector128.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; - - goto NEXT_ADVSIMD; - } Debug.Fail("Unreachable"); return -1; From 8af9270480aff6cb9f320d7e07f5a63862380701 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 8 Jan 2022 19:31:22 +0300 Subject: [PATCH 08/39] fix bug in Char version (we need two clear two lowest bits in the mask) & temporarily remove AdvSimd impl --- .../src/System/SpanHelpers.Byte.cs | 63 +-------------- .../src/System/SpanHelpers.Char.cs | 79 +++---------------- 2 files changed, 17 insertions(+), 125 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 8e52951e123ab7..1146156738ee2f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -32,7 +32,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte int offset = 0; // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -138,63 +138,6 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte goto NEXT_SSE; } - else if (AdvSimd.IsSupported) - { - // Find the last unique (which is not equal to ch1) byte - // the algorithm is fine if both are equal, just a little bit less efficient - byte ch2Val = Unsafe.Add(ref value, valueTailLength); - int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) - ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - - Vector128 ch1 = Vector128.Create(value); - Vector128 ch2 = Vector128.Create(ch2Val); - - NEXT_ADVSIMD: - Vector128 bothEq = AdvSimd.And( - AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsUInt64(); - - ulong mask = AdvSimd.Extract(bothEq, 0); - if (mask != 0) - { - for (int i = 0; i < 8; i++) - { - if ((mask & 0xFF) != 0 && - SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i), ref value, (nuint)(uint)valueLength)) - { - return offset + i; - } - mask >>= 8; - } - } - - // Inspect the second lane - mask = AdvSimd.Extract(bothEq, 1); - if (mask != 0) - { - for (int i = 0; i < 8; i++) - { - if ((mask & 0xFF) != 0 && - SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + i + 8), ref value, (nuint)(uint)valueLength)) - { - return offset + i + 8; - } - mask >>= 8; - } - } - - offset += Vector128.Count; - - if (offset + valueTailLength == searchSpaceLength) - return -1; - - // Overlap with the current chunk if there is not enough room for the next one - if (offset + valueTailLength + Vector128.Count > searchSpaceLength) - offset = searchSpaceLength - valueTailLength - Vector128.Count; - - goto NEXT_ADVSIMD; - } Debug.Fail("Unreachable"); return -1; @@ -574,7 +517,9 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b } int offset = 0; - if ((Sse2.IsSupported/* || AdvSimd.IsSupported*/) && searchSpaceLength - valueTailLength >= Vector128.Count) + + // Avx2 implies Sse2 + if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 5f8d1181633b0b..7226e1ca0c8693 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -33,7 +33,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char } // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -100,7 +100,12 @@ ref Unsafe.As(ref value), { return index + bitPos; } - mask &= mask - 1; + + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); } index += Vector256.Count; @@ -142,71 +147,13 @@ ref Unsafe.As(ref value), { return index + bitPos; } - mask &= mask - 1; - } - index += Vector128.Count; - - if (index + valueTailLength == searchSpaceLength) - return -1; - - // Overlap with the current chunk if there is not enough room for the next one - if (index + valueTailLength + Vector128.Count > searchSpaceLength) - index = searchSpaceLength - valueTailLength - Vector128.Count; - - goto NEXT_SSE; - } - else if (AdvSimd.IsSupported) - { - // Find the last unique (which is not equal to ch1) character - // the algorithm is fine if both are equal, just a little bit less efficient - ushort ch2Val = Unsafe.Add(ref value, valueTailLength); - int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) - ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - - Vector128 ch1 = Vector128.Create((ushort)value); - Vector128 ch2 = Vector128.Create(ch2Val); - - NEXT_ADVSIMD: - Vector128 bothEq = AdvSimd.And( - AdvSimd.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), - AdvSimd.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsUInt64(); - - ulong mask = AdvSimd.Extract(bothEq, 0); - if (mask != 0) - { - for (int i = 0; i < 4; i++) - { - if ((mask & 0xFFFF) != 0 && - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + i)), - ref Unsafe.As(ref value), - (nuint)(uint)valueLength * 2)) - { - return index + i; - } - mask >>= 16; - } - } - // Inspect the second lane - mask = AdvSimd.Extract(bothEq, 1); - if (mask != 0) - { - for (int i = 0; i < 4; i++) - { - if ((mask & 0xFFFF) != 0 && - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + i + 4)), - ref Unsafe.As(ref value), - (nuint)(uint)valueTailLength * 2)) - { - return index + i + 4; - } - mask >>= 16; - } + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); } - index += Vector128.Count; if (index + valueTailLength == searchSpaceLength) @@ -216,7 +163,7 @@ ref Unsafe.As(ref value), if (index + valueTailLength + Vector128.Count > searchSpaceLength) index = searchSpaceLength - valueTailLength - Vector128.Count; - goto NEXT_ADVSIMD; + goto NEXT_SSE; } Debug.Fail("Unreachable"); From 87c26d01e23120dc35abc1da3b37336423bbb86e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 8 Jan 2022 19:35:40 +0300 Subject: [PATCH 09/39] use ResetLowestSetBit --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 1146156738ee2f..63e79acbfebc80 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -89,7 +89,8 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte int bitPos = BitOperations.TrailingZeroCount(mask); if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) return offset + bitPos; - mask &= mask - 1; + + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); } offset += Vector256.Count; @@ -125,7 +126,8 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte int bitPos = BitOperations.TrailingZeroCount(mask); if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) return offset + bitPos; - mask &= mask - 1; + + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); } offset += Vector128.Count; From 652b42d6197b974320687b81ef4e3623fadc12b2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 01:01:19 +0300 Subject: [PATCH 10/39] Fix bug --- .../src/System/SpanHelpers.Char.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 7226e1ca0c8693..99b1a9fa2feadf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -91,14 +91,15 @@ ref Unsafe.As(ref valueTail), while (mask != 0) { + int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars - int bitPos = (int)((uint)BitOperations.TrailingZeroCount(mask) / 2); + int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + bitPos)), + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { - return index + bitPos; + return index + charPos; } // Clear two lowest set bits @@ -138,14 +139,15 @@ ref Unsafe.As(ref value), while (mask != 0) { + int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars - int bitPos = (int)((uint)BitOperations.TrailingZeroCount(mask) / 2); + int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + bitPos)), + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { - return index + bitPos; + return index + charPos; } // Clear two lowest set bits From 53cefad22a6302f002aa14dee2bb562df4facb45 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 01:53:54 +0300 Subject: [PATCH 11/39] Add two-byte LastIndexOf --- .../src/System/SpanHelpers.Char.cs | 154 ++++++++++++++++++ .../src/System/SpanHelpers.T.cs | 18 +- 2 files changed, 170 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 99b1a9fa2feadf..32e7811a2e5b23 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -172,6 +172,160 @@ ref Unsafe.As(ref value), return -1; } + public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref char value, int valueLength) + { + Debug.Assert(searchSpaceLength >= 0); + Debug.Assert(valueLength >= 0); + + if (valueLength == 0) + return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. + + int valueTailLength = valueLength - 1; + + if (valueTailLength == 0) + { + // for single-char values use plain LastIndexOf + return LastIndexOf(ref searchSpace, value, searchSpaceLength); + } + + int offset = 0; + + // Avx2 implies Sse2 + if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_CHARS; + } + + char valueHead = value; + ref char valueTail = ref Unsafe.Add(ref value, 1); + + while (true) + { + Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". + int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Do a quick search for the first element of "value". + int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); + if (relativeIndex == -1) + break; + + // Found the first element of "value". See if the tail matches. + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), + ref Unsafe.As(ref valueTail), + (nuint)(uint)valueTailLength * 2)) // The (nunit)-cast is necessary to pick the correct overload + { + return relativeIndex; // The tail matched. Return a successful find. + } + + offset += remainingSearchSpaceLength - relativeIndex; + } + return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_CHARS: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + { + offset = searchSpaceLength - valueTailLength - Vector256.Count; + + // Find the last unique (which is not equal to ch1) char + // the algorithm is fine if both are equal, just a little bit less efficient + char ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create((ushort)value); + Vector256 ch2 = Vector256.Create((ushort)ch2Val); + + NEXT_AVX: + uint mask = (uint)Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); + + while (mask != 0) + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), + (nuint)(uint)valueLength * 2)) + { + return charPos + offset; + } + + Debug.Assert(bitPos > 0); + mask &= ~(uint)(0b11 << (bitPos - 1)); // clear two highest set bits. + } + + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + goto NEXT_AVX; + } + else if (Sse2.IsSupported) + { + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) char + // the algorithm is fine if both are equal, just a little bit less efficient + char ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create((ushort)value); + Vector128 ch2 = Vector128.Create((ushort)ch2Val); + + NEXT_SSE: + uint mask = (uint)Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); + + while (mask != 0) + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), + (nuint)(uint)valueLength * 2)) + { + return charPos + offset; + } + + Debug.Assert(bitPos > 0); + mask &= ~(uint)(0b11 << (bitPos - 1)); // clear two highest set bits. + } + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + goto NEXT_SSE; + } + + Debug.Fail("Unreachable"); + return -1; + } + [MethodImpl(MethodImplOptions.AggressiveOptimization)] public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref char second, int secondLength) { diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 684bce9d96f2fd..61185c64b1104c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -774,11 +774,25 @@ public static int LastIndexOf(ref T searchSpace, int searchSpaceLength, ref T if (valueLength == 0) return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - T valueHead = value; - ref T valueTail = ref Unsafe.Add(ref value, 1); + if (RuntimeHelpers.IsBitwiseEquatable()) + { + if (Unsafe.SizeOf() == 1) + return LastIndexOf(ref Unsafe.As(ref searchSpace), searchSpaceLength, ref Unsafe.As(ref value), valueLength); + if (Unsafe.SizeOf() == 2) + return LastIndexOf(ref Unsafe.As(ref searchSpace), searchSpaceLength, ref Unsafe.As(ref value), valueLength); + } + int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + { + return LastIndexOf(ref searchSpace, value, searchSpaceLength); + } int index = 0; + + T valueHead = value; + ref T valueTail = ref Unsafe.Add(ref value, 1); + while (true) { Debug.Assert(0 <= index && index <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". From d46540745397892335c9f39e18c6018b98e198d1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 02:10:48 +0300 Subject: [PATCH 12/39] Fix build --- .../src/System/SpanHelpers.Char.cs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 32e7811a2e5b23..73b8c95ecfd117 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -250,7 +250,7 @@ ref Unsafe.As(ref valueTail), while (mask != 0) { // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( @@ -261,8 +261,7 @@ ref Unsafe.As(ref value), return charPos + offset; } - Debug.Assert(bitPos > 0); - mask &= ~(uint)(0b11 << (bitPos - 1)); // clear two highest set bits. + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. } offset -= Vector256.Count; @@ -297,7 +296,7 @@ ref Unsafe.As(ref value), while (mask != 0) { // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( @@ -308,8 +307,7 @@ ref Unsafe.As(ref value), return charPos + offset; } - Debug.Assert(bitPos > 0); - mask &= ~(uint)(0b11 << (bitPos - 1)); // clear two highest set bits. + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. } offset -= Vector128.Count; From 2c851bc0b0f07ed8b81410fedf46cb21e0617466 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 15:09:09 +0300 Subject: [PATCH 13/39] Minor optimizations --- .../src/System/SpanHelpers.Byte.cs | 54 +++++++++++++---- .../src/System/SpanHelpers.Char.cs | 60 ++++++++++--------- 2 files changed, 73 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 63e79acbfebc80..82882913e3707e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -30,15 +30,17 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte return IndexOf(ref searchSpace, value, searchSpaceLength); } + byte valueHead = value; + ref byte valueTail = ref Unsafe.Add(ref value, 1); int offset = 0; + nuint valueTailNLength = (nuint)(uint)valueTailLength; + // Avx2 implies Sse2 if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; while (remainingSearchSpaceLength > 0) @@ -55,7 +57,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, (nuint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailNLength)) // The (nuint)-cast is necessary to pick the correct overload return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -87,9 +89,15 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte while (mask != 0) { int bitPos = BitOperations.TrailingZeroCount(mask); - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { return offset + bitPos; + } + // Clear lowest set bit mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); } offset += Vector256.Count; @@ -124,9 +132,15 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte while (mask != 0) { int bitPos = BitOperations.TrailingZeroCount(mask); - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { return offset + bitPos; + } + // Clear lowest set bit mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); } offset += Vector128.Count; @@ -518,7 +532,10 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b return LastIndexOf(ref searchSpace, value, searchSpaceLength); } + byte valueHead = value; + ref byte valueTail = ref Unsafe.Add(ref value, 1); int offset = 0; + nuint valueTailNLength = (nuint)(uint)valueTailLength; // Avx2 implies Sse2 if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) @@ -526,9 +543,6 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b goto SEARCH_TWO_BYTES; } - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); - while (true) { Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". @@ -542,7 +556,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b break; // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, (nuint)(uint)valueTailLength)) // The (nunit)-cast is necessary to pick the correct overload + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailNLength)) // The (nunit)-cast is necessary to pick the correct overload return relativeIndex; // The tail matched. Return a successful find. offset += remainingSearchSpaceLength - relativeIndex; @@ -576,9 +590,16 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { return bitPos + offset; - mask &= ~(uint)(1 << bitPos); // clear the highest set bit. + } + + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); } offset -= Vector256.Count; @@ -614,9 +635,16 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { return bitPos + offset; - mask &= ~(uint)(1 << bitPos); // clear the highest set bit. + } + + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); } offset -= Vector128.Count; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 73b8c95ecfd117..a2ac6005f7d172 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -23,7 +23,6 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char return 0; // A zero-length sequence is always treated as "found" at the start of the search space. int index = 0; - char valueHead = value; int valueTailLength = valueLength - 1; if (valueTailLength == 0) @@ -32,13 +31,16 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char return IndexOf(ref searchSpace, value, searchSpaceLength); } + char valueHead = value; + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; + // Avx2 implies Sse2 if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } - ref char valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; while (remainingSearchSpaceLength > 0) @@ -56,9 +58,9 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char // Found the first element of "value". See if the tail matches. if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), - ref Unsafe.As(ref valueTail), - (nuint)(uint)valueTailLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), + ref valueTail, + valueTailByteLength)) { return index; // The tail matched. Return a successful find. } @@ -77,10 +79,10 @@ ref Unsafe.As(ref valueTail), // the algorithm is fine if both are equal, just a little bit less efficient ushort ch2Val = Unsafe.Add(ref value, valueTailLength); int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) + while (ch2Val == valueHead && ch1ch2Distance > 1) ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - Vector256 ch1 = Vector256.Create((ushort)value); + Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create(ch2Val); NEXT_AVX: @@ -95,9 +97,9 @@ ref Unsafe.As(ref valueTail), // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), - (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) { return index + charPos; } @@ -125,10 +127,10 @@ ref Unsafe.As(ref value), // the algorithm is fine if both are equal, just a little bit less efficient ushort ch2Val = Unsafe.Add(ref value, valueTailLength); int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) + while (ch2Val == valueHead && ch1ch2Distance > 1) ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - Vector128 ch1 = Vector128.Create((ushort)value); + Vector128 ch1 = Vector128.Create((ushort)valueHead); Vector128 ch2 = Vector128.Create(ch2Val); NEXT_SSE: @@ -143,9 +145,9 @@ ref Unsafe.As(ref value), // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), - (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) { return index + charPos; } @@ -190,15 +192,16 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c int offset = 0; + char valueHead = value; + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; + // Avx2 implies Sse2 if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } - char valueHead = value; - ref char valueTail = ref Unsafe.Add(ref value, 1); - while (true) { Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". @@ -214,8 +217,8 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c // Found the first element of "value". See if the tail matches. if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), - ref Unsafe.As(ref valueTail), - (nuint)(uint)valueTailLength * 2)) // The (nunit)-cast is necessary to pick the correct overload + ref valueTail, + valueTailByteLength)) // The (nunit)-cast is necessary to pick the correct overload { return relativeIndex; // The tail matched. Return a successful find. } @@ -227,6 +230,7 @@ ref Unsafe.As(ref valueTail), // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) { offset = searchSpaceLength - valueTailLength - Vector256.Count; @@ -235,10 +239,10 @@ ref Unsafe.As(ref valueTail), // the algorithm is fine if both are equal, just a little bit less efficient char ch2Val = Unsafe.Add(ref value, valueTailLength); int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) + while (ch2Val == valueHead && ch1ch2Distance > 1) ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - Vector256 ch1 = Vector256.Create((ushort)value); + Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create((ushort)ch2Val); NEXT_AVX: @@ -254,9 +258,9 @@ ref Unsafe.As(ref valueTail), int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), - (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) { return charPos + offset; } @@ -300,9 +304,9 @@ ref Unsafe.As(ref value), int charPos = (int)((uint)bitPos / 2); if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), - (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) { return charPos + offset; } From 9308d824d7b0774fe885783c798b35e4967f4ea7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 15:20:19 +0300 Subject: [PATCH 14/39] optimize cases with two-byte/two-char values --- .../src/System/SpanHelpers.Byte.cs | 12 ++++++++---- .../src/System/SpanHelpers.Char.cs | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 82882913e3707e..a21003b15730ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -89,7 +89,8 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte while (mask != 0) { int bitPos = BitOperations.TrailingZeroCount(mask); - if (SequenceEqual( + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, valueTailNLength)) @@ -132,7 +133,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), while (mask != 0) { int bitPos = BitOperations.TrailingZeroCount(mask); - if (SequenceEqual( + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, valueTailNLength)) @@ -590,7 +592,8 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (SequenceEqual( + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, valueTailNLength)) @@ -635,7 +638,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (SequenceEqual( + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, valueTailNLength)) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a2ac6005f7d172..62b7883552e2b3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -96,7 +96,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (SequenceEqual( + if (valueTailByteLength == 2 || // we already matched two chars + SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), ref valueTail, valueTailByteLength)) @@ -144,7 +145,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (SequenceEqual( + if (valueTailByteLength == 2 || // we already matched two chars + SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), ref valueTail, valueTailByteLength)) @@ -257,7 +259,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); - if (SequenceEqual( + if (valueTailByteLength == 2 || // we already matched two chars + SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), ref valueTail, valueTailByteLength)) @@ -303,7 +306,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); - if (SequenceEqual( + if (valueTailByteLength == 2 || // we already matched two chars + SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), ref valueTail, valueTailByteLength)) From dac974af8451d6e448f2b2297dfa1a205176d85d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 16:00:12 +0300 Subject: [PATCH 15/39] Remove gotos, fix build --- .../src/System/SpanHelpers.Byte.cs | 211 +++++++-------- .../src/System/SpanHelpers.Char.cs | 240 +++++++++--------- 2 files changed, 234 insertions(+), 217 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index a21003b15730ee..bbe14a1bd433dd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -80,37 +80,40 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte Vector256 ch1 = Vector256.Create(value); Vector256 ch2 = Vector256.Create(ch2Val); - NEXT_AVX: - uint mask = (uint)Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - - while (mask != 0) + do { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = (uint) + Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) { - return offset + bitPos; + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return offset + bitPos; + } + + // Clear lowest set bit + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); - } - offset += Vector256.Count; + offset += Vector256.Count; - if (offset + valueTailLength == searchSpaceLength) - return -1; + if (offset + valueTailLength == searchSpaceLength) + return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset + valueTailLength + Vector256.Count > searchSpaceLength) - offset = searchSpaceLength - valueTailLength - Vector256.Count; + // Overlap with the current chunk if there is not enough room for the next one + if (offset + valueTailLength + Vector256.Count > searchSpaceLength) + offset = searchSpaceLength - valueTailLength - Vector256.Count; - goto NEXT_AVX; + } while (true); } else if (Sse2.IsSupported) { @@ -124,37 +127,39 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), Vector128 ch1 = Vector128.Create(value); Vector128 ch2 = Vector128.Create(ch2Val); - NEXT_SSE: - uint mask = (uint)Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - - while (mask != 0) + do { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = (uint) + Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) { - return offset + bitPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return offset + bitPos; + } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); - } - offset += Vector128.Count; + // Clear lowest set bit + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + } + offset += Vector128.Count; - if (offset + valueTailLength == searchSpaceLength) - return -1; + if (offset + valueTailLength == searchSpaceLength) + return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset + valueTailLength + Vector128.Count > searchSpaceLength) - offset = searchSpaceLength - valueTailLength - Vector128.Count; + // Overlap with the current chunk if there is not enough room for the next one + if (offset + valueTailLength + Vector128.Count > searchSpaceLength) + offset = searchSpaceLength - valueTailLength - Vector128.Count; - goto NEXT_SSE; + } while (true); } Debug.Fail("Unreachable"); @@ -582,37 +587,39 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b Vector256 ch1 = Vector256.Create(value); Vector256 ch2 = Vector256.Create(ch2Val); - NEXT_AVX: - uint mask = (uint)Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - - while (mask != 0) + do { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = (uint) + Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) { - return bitPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); - } + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); + } - offset -= Vector256.Count; - if (offset == -Vector256.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; - goto NEXT_AVX; + } while (true); } else if (Sse2.IsSupported) { @@ -628,37 +635,39 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), Vector128 ch1 = Vector128.Create(value); Vector128 ch2 = Vector128.Create(ch2Val); - NEXT_SSE: - uint mask = (uint)Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); - - while (mask != 0) + do { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = (uint) + Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + + while (mask != 0) { - return bitPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); - } + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); + } - offset -= Vector128.Count; - if (offset == -Vector128.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; - goto NEXT_SSE; + } while (true); } Debug.Fail("Unreachable"); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 62b7883552e2b3..eb97b7028c04c8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -85,44 +85,47 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create(ch2Val); - NEXT_AVX: - uint mask = (uint)Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, index)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance))).AsByte()); - - while (mask != 0) + do { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = (uint) + Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, index)), + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance))).AsByte()); + + while (mask != 0) { - return index + charPos; + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return index + charPos; + } + + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); } - // Clear two lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); - } - index += Vector256.Count; + index += Vector256.Count; - if (index + valueTailLength == searchSpaceLength) - return -1; + if (index + valueTailLength == searchSpaceLength) + return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (index + valueTailLength + Vector256.Count > searchSpaceLength) - index = searchSpaceLength - valueTailLength - Vector256.Count; + // Overlap with the current chunk if there is not enough room for the next one + if (index + valueTailLength + Vector256.Count > searchSpaceLength) + index = searchSpaceLength - valueTailLength - Vector256.Count; - goto NEXT_AVX; + } while (true); } - else if (Sse2.IsSupported) + if (Sse2.IsSupported) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -134,42 +137,44 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), Vector128 ch1 = Vector128.Create((ushort)valueHead); Vector128 ch2 = Vector128.Create(ch2Val); - NEXT_SSE: - uint mask = (uint)Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsByte()); - - while (mask != 0) + do { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = (uint)Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsByte()); + + while (mask != 0) { - return index + charPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return index + charPos; + } - // Clear two lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); - } - index += Vector128.Count; + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } - if (index + valueTailLength == searchSpaceLength) - return -1; + index += Vector128.Count; - // Overlap with the current chunk if there is not enough room for the next one - if (index + valueTailLength + Vector128.Count > searchSpaceLength) - index = searchSpaceLength - valueTailLength - Vector128.Count; + if (index + valueTailLength == searchSpaceLength) + return -1; - goto NEXT_SSE; + // Overlap with the current chunk if there is not enough room for the next one + if (index + valueTailLength + Vector128.Count > searchSpaceLength) + index = searchSpaceLength - valueTailLength - Vector128.Count; + } + while (true); } Debug.Fail("Unreachable"); @@ -247,38 +252,40 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create((ushort)ch2Val); - NEXT_AVX: - uint mask = (uint)Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); - - while (mask != 0) + do { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 30 - BitOperations.LeadingZeroCount(mask); - int charPos = (int)((uint)bitPos / 2); - - if (valueTailByteLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) - { - return charPos + offset; - } + uint mask = (uint)Avx2.MoveMask( + Avx2.And( + Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), + Avx2.CompareEqual(ch2, + LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))) + .AsByte()); - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. - } + while (mask != 0) + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return charPos + offset; + } - offset -= Vector256.Count; - if (offset == -Vector256.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + } - goto NEXT_AVX; + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + } while (true); } else if (Sse2.IsSupported) { @@ -294,38 +301,39 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) Vector128 ch1 = Vector128.Create((ushort)value); Vector128 ch2 = Vector128.Create((ushort)ch2Val); - NEXT_SSE: - uint mask = (uint)Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); - - while (mask != 0) + do { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 30 - BitOperations.LeadingZeroCount(mask); - int charPos = (int)((uint)bitPos / 2); - - if (valueTailByteLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) - { - return charPos + offset; - } + uint mask = (uint) + Sse2.MoveMask( + Sse2.And( + Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), + Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. - } + while (mask != 0) + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return charPos + offset; + } - offset -= Vector128.Count; - if (offset == -Vector128.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + } - goto NEXT_SSE; + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + } while (true); } Debug.Fail("Unreachable"); From 3554ad3a9c60571b10694f7a2c5c9531b48817b9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 19:58:45 +0300 Subject: [PATCH 16/39] fix bug in LastIndexOf --- .../src/System/SpanHelpers.Byte.cs | 4 ++-- .../src/System/SpanHelpers.Char.cs | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index bbe14a1bd433dd..f9b64627600c89 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -599,7 +599,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes + if (valueTailNLength == 1 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, @@ -647,7 +647,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 2 || // we already matched two bytes + if (valueTailNLength == 1 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), ref valueTail, diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index eb97b7028c04c8..68731513cef7ec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -139,7 +139,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), do { - uint mask = (uint)Sse2.MoveMask( + uint mask = (uint) + Sse2.MoveMask( Sse2.And( Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsByte()); @@ -254,12 +255,11 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), do { - uint mask = (uint)Avx2.MoveMask( - Avx2.And( + uint mask = (uint) + Avx2.MoveMask( + Avx2.And( Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, - LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))) - .AsByte()); + Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); while (mask != 0) { From de87ec22d5ff54e804d86b2f34ae0db7b48c3e04 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 9 Jan 2022 21:45:48 +0300 Subject: [PATCH 17/39] Make sure String.LastIndexOf is optimized --- .../src/System/MemoryExtensions.cs | 25 ++++++++++++++----- .../src/System/SpanHelpers.T.cs | 8 ------ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index d3c0039fbef62e..ae6be670a0421e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -580,12 +580,25 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(span)), [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LastIndexOf(this ReadOnlySpan span, ReadOnlySpan value) where T : IEquatable { - if (Unsafe.SizeOf() == sizeof(byte) && RuntimeHelpers.IsBitwiseEquatable()) - return SpanHelpers.LastIndexOf( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), - span.Length, - ref Unsafe.As(ref MemoryMarshal.GetReference(value)), - value.Length); + if (RuntimeHelpers.IsBitwiseEquatable()) + { + if (Unsafe.SizeOf() == sizeof(byte)) + { + return SpanHelpers.LastIndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref Unsafe.As(ref MemoryMarshal.GetReference(value)), + value.Length); + } + if (Unsafe.SizeOf() == sizeof(char)) + { + return SpanHelpers.LastIndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref Unsafe.As(ref MemoryMarshal.GetReference(value)), + value.Length); + } + } return SpanHelpers.LastIndexOf(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(value), value.Length); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 61185c64b1104c..a61c4f89fbd2f1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -774,14 +774,6 @@ public static int LastIndexOf(ref T searchSpace, int searchSpaceLength, ref T if (valueLength == 0) return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - if (RuntimeHelpers.IsBitwiseEquatable()) - { - if (Unsafe.SizeOf() == 1) - return LastIndexOf(ref Unsafe.As(ref searchSpace), searchSpaceLength, ref Unsafe.As(ref value), valueLength); - if (Unsafe.SizeOf() == 2) - return LastIndexOf(ref Unsafe.As(ref searchSpace), searchSpaceLength, ref Unsafe.As(ref value), valueLength); - } - int valueTailLength = valueLength - 1; if (valueTailLength == 0) { From b0b04ad0ffea22825ccaf83af26f96381f441886 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 14 Jan 2022 02:17:56 +0300 Subject: [PATCH 18/39] Use xplat simd helpers - implicit ARM support --- .../src/System/SpanHelpers.Byte.cs | 36 +++++++----------- .../src/System/SpanHelpers.Char.cs | 37 ++++++++----------- 2 files changed, 29 insertions(+), 44 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index f9b64627600c89..8154d3536cf8c2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -36,7 +36,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte nuint valueTailNLength = (nuint)(uint)valueTailLength; // Avx2 implies Sse2 - if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -82,11 +82,9 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte do { - uint mask = (uint) - Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -129,11 +127,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), do { - uint mask = (uint) - Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -545,7 +541,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b nuint valueTailNLength = (nuint)(uint)valueTailLength; // Avx2 implies Sse2 - if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -589,11 +585,9 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b do { - uint mask = (uint) - Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -637,11 +631,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), do { - uint mask = (uint) - Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))))); + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 68731513cef7ec..590e50b6837fb2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -36,7 +36,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; // Avx2 implies Sse2 - if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -87,11 +87,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), do { - uint mask = (uint) - Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, index)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance))).AsByte()); + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -139,11 +137,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), do { - uint mask = (uint) - Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, index)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance))).AsByte()); + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -205,7 +201,7 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; // Avx2 implies Sse2 - if (Sse2.IsSupported && searchSpaceLength - valueTailLength >= Vector128.Count) + if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -255,11 +251,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), do { - uint mask = (uint) - Avx2.MoveMask( - Avx2.And( - Avx2.CompareEqual(ch1, LoadVector256(ref searchSpace, (nuint)offset)), - Avx2.CompareEqual(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); + + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { @@ -303,11 +298,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) do { - uint mask = (uint) - Sse2.MoveMask( - Sse2.And( - Sse2.CompareEqual(ch1, LoadVector128(ref searchSpace, (nuint)offset)), - Sse2.CompareEqual(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance)))).AsByte()); + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); while (mask != 0) { From 141e236d78f4cadef1abd67fe0e59a1f27671bb2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 01:40:59 +0300 Subject: [PATCH 19/39] fix arm --- "\\" | 6 ++++++ .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 4 ++-- .../System.Private.CoreLib/src/System/SpanHelpers.Char.cs | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 "\\" diff --git "a/\\" "b/\\" new file mode 100644 index 00000000000000..4369a13dc3457d --- /dev/null +++ "b/\\" @@ -0,0 +1,6 @@ +Merge branch 'main' of github.com:dotnet/runtime into arm64-zeroext-after-ldr +# Please enter a commit message to explain why this merge is necessary, +# especially if it merges an updated upstream into a topic branch. +# +# Lines starting with '#' will be ignored, and an empty message aborts +# the commit. diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 8154d3536cf8c2..1a232c47518a5e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -113,7 +113,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else if (Sse2.IsSupported) + else if (Sse2.IsSupported || AdvSimd.IsSupported) { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -615,7 +615,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else if (Sse2.IsSupported) + else if (Sse2.IsSupported || AdvSimd.IsSupported) { offset = searchSpaceLength - valueTailLength - Vector128.Count; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 590e50b6837fb2..7413410ce3c91c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -123,7 +123,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), } while (true); } - if (Sse2.IsSupported) + if (Sse2.IsSupported || AdvSimd.IsSupported) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -282,7 +282,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) offset = 0; } while (true); } - else if (Sse2.IsSupported) + else if (Sse2.IsSupported || AdvSimd.IsSupported) { offset = searchSpaceLength - valueTailLength - Vector128.Count; From e664ad33502b1bca626eb47c283af4cc5abf8558 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sat, 15 Jan 2022 14:56:13 +0300 Subject: [PATCH 20/39] Delete \ --- "\\" | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 "\\" diff --git "a/\\" "b/\\" deleted file mode 100644 index 4369a13dc3457d..00000000000000 --- "a/\\" +++ /dev/null @@ -1,6 +0,0 @@ -Merge branch 'main' of github.com:dotnet/runtime into arm64-zeroext-after-ldr -# Please enter a commit message to explain why this merge is necessary, -# especially if it merges an updated upstream into a topic branch. -# -# Lines starting with '#' will be ignored, and an empty message aborts -# the commit. From bff84196b3a7ea9401d9a8a999d199fa67b1c63d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 15:01:43 +0300 Subject: [PATCH 21/39] Use Vector128.IsHardwareAccelerated --- .../src/System/SpanHelpers.Byte.cs | 10 ++++------ .../src/System/SpanHelpers.Char.cs | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 1a232c47518a5e..8ed6930bafb5b8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -35,8 +35,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte int offset = 0; nuint valueTailNLength = (nuint)(uint)valueTailLength; - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -113,7 +112,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else if (Sse2.IsSupported || AdvSimd.IsSupported) + else { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -540,8 +539,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b int offset = 0; nuint valueTailNLength = (nuint)(uint)valueTailLength; - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -615,7 +613,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else if (Sse2.IsSupported || AdvSimd.IsSupported) + else { offset = searchSpaceLength - valueTailLength - Vector128.Count; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 7413410ce3c91c..81abfca123cbb2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -35,8 +35,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -123,7 +122,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), } while (true); } - if (Sse2.IsSupported || AdvSimd.IsSupported) + else { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -200,8 +199,7 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; - // Avx2 implies Sse2 - if ((Sse2.IsSupported || AdvSimd.IsSupported) && searchSpaceLength - valueTailLength >= Vector128.Count) + if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -282,7 +280,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) offset = 0; } while (true); } - else if (Sse2.IsSupported || AdvSimd.IsSupported) + else { offset = searchSpaceLength - valueTailLength - Vector128.Count; From 3def5e07349a07934137b4b929de1996d45a2452 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 15:16:49 +0300 Subject: [PATCH 22/39] Fix build --- .../src/System/SpanHelpers.Byte.cs | 9 +++++---- .../src/System/SpanHelpers.Char.cs | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 8ed6930bafb5b8..079d6dccb0d786 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -112,7 +112,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else + + if (Vector128.IsHardwareAccelerated) { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -564,8 +565,8 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b } return -1; - // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła - // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_BYTES: if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) { @@ -613,7 +614,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (true); } - else + if (Vector128.IsHardwareAccelerated) { offset = searchSpaceLength - valueTailLength - Vector128.Count; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 81abfca123cbb2..13bea10bcaab02 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -122,7 +122,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), } while (true); } - else + + if (Vector128.IsHardwareAccelerated) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -229,10 +230,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), } return -1; - // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła - // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) { offset = searchSpaceLength - valueTailLength - Vector256.Count; @@ -280,7 +280,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) offset = 0; } while (true); } - else + + if (Vector128.IsHardwareAccelerated) { offset = searchSpaceLength - valueTailLength - Vector128.Count; From a52138bfbf29ae468e69ae92a632630fc15f3f74 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 19:02:40 +0300 Subject: [PATCH 23/39] Use IsAllZero --- .../src/System/SpanHelpers.Byte.cs | 147 +++++++++++------ .../src/System/SpanHelpers.Char.cs | 148 ++++++++++-------- 2 files changed, 183 insertions(+), 112 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 079d6dccb0d786..592c2457ad96a0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -83,22 +83,26 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte { Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + if (!cmpAnd.IsAllZero()) { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return offset + bitPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return offset + bitPos; + } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + // Clear lowest set bit + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + } } offset += Vector256.Count; @@ -129,22 +133,30 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + // On Platforms with SSE41 and ARM64 use fast "is all zero" check + // it's especially important for ARM64 where ExtractMostSignificantBits is expensive + // but it also shows nice numbers on XArch + bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; + if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return offset + bitPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return offset + bitPos; + } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + // Clear lowest set bit + mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + } } offset += Vector128.Count; @@ -586,23 +598,27 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + if (!cmpAnd.IsAllZero()) { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return bitPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); + } } offset -= Vector256.Count; @@ -632,23 +648,31 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + // On Platforms with SSE41 and ARM64 use fast "is all zero" check + // it's especially important for ARM64 where ExtractMostSignificantBits is expensive + // but it also shows nice numbers on XArch + bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; + if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return bitPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); + // Clear the highest set bit. + mask &= ~(uint)(1 << bitPos); + } } offset -= Vector128.Count; @@ -2234,5 +2258,28 @@ private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128> 2; return true; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsAllZero(this Vector128 vector) + { + // TODO: JIT should emit it for `vector == Vector128.Zero, see + // https://github.com/dotnet/runtime/issues/63829 + if (AdvSimd.Arm64.IsSupported) + { + return AdvSimd.Arm64.MaxAcross(vector).ToScalar() == 0; + } + + // Currently, this helper is only used when SSE41 is available on xarch + // so no need for SSE2 MoveMask here + Debug.Assert(Sse41.IsSupported); + return Sse41.TestZ(vector, vector); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsAllZero(this Vector256 vector) + { + Debug.Assert(Avx.IsSupported); + return Avx.TestZ(vector, vector); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 13bea10bcaab02..dfb68cc97406b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -88,27 +88,31 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), { Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + if (!cmpAnd.IsAllZero()) { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return index + charPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return index + charPos; + } - // Clear two lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } } index += Vector256.Count; @@ -139,27 +143,35 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), { Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + // On Platforms with SSE41 and ARM64 use fast "is all zero" check + // it's especially important for ARM64 where ExtractMostSignificantBits is expensive + // but it also shows nice numbers on XArch + bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; + if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return index + charPos; - } + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return index + charPos; + } - // Clear two lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } } index += Vector128.Count; @@ -252,24 +264,28 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, (nuint)offset)); Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + if (!cmpAnd.IsAllZero()) { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 30 - BitOperations.LeadingZeroCount(mask); - int charPos = (int)((uint)bitPos / 2); - - if (valueTailByteLength == 1 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return charPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return charPos + offset; + } - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + } } offset -= Vector256.Count; @@ -299,24 +315,32 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) { Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, (nuint)offset)); Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - uint mask = (cmpCh1 & cmpCh2).AsByte().ExtractMostSignificantBits(); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - while (mask != 0) + // On Platforms with SSE41 and ARM64 use fast "is all zero" check + // it's especially important for ARM64 where ExtractMostSignificantBits is expensive + // but it also shows nice numbers on XArch + bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; + if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 30 - BitOperations.LeadingZeroCount(mask); - int charPos = (int)((uint)bitPos / 2); - - if (valueTailByteLength == 1 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) + uint mask = cmpAnd.ExtractMostSignificantBits(); + while (mask != 0) { - return charPos + offset; - } + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), + ref valueTail, + valueTailByteLength)) + { + return charPos + offset; + } - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + } } offset -= Vector128.Count; From f86e32337bcdb0f3038a6f7bb7af0e07923a4ea9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 20:53:48 +0300 Subject: [PATCH 24/39] Address feedback --- .../src/System/SpanHelpers.Byte.cs | 53 +++++-------------- .../src/System/SpanHelpers.Char.cs | 28 +++++----- 2 files changed, 26 insertions(+), 55 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 592c2457ad96a0..120a00309e6693 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -85,7 +85,8 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (!cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -100,8 +101,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), return offset + bitPos; } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + // Clear the lowest set bit (BLSR on xarch) + mask &= mask - 1; } } @@ -135,11 +136,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - // On Platforms with SSE41 and ARM64 use fast "is all zero" check - // it's especially important for ARM64 where ExtractMostSignificantBits is expensive - // but it also shows nice numbers on XArch - bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; - if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -154,8 +153,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), return offset + bitPos; } - // Clear lowest set bit - mask = Bmi1.IsSupported ? Bmi1.ResetLowestSetBit(mask) : mask & (mask - 1); + // Clear the lowest set bit (BLSR on xarch) + mask &= mask - 1; } } offset += Vector128.Count; @@ -600,7 +599,8 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (!cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -650,11 +650,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - // On Platforms with SSE41 and ARM64 use fast "is all zero" check - // it's especially important for ARM64 where ExtractMostSignificantBits is expensive - // but it also shows nice numbers on XArch - bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; - if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -2258,28 +2256,5 @@ private static bool TryFindFirstMatchedLane(Vector128 mask, Vector128> 2; return true; } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsAllZero(this Vector128 vector) - { - // TODO: JIT should emit it for `vector == Vector128.Zero, see - // https://github.com/dotnet/runtime/issues/63829 - if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.MaxAcross(vector).ToScalar() == 0; - } - - // Currently, this helper is only used when SSE41 is available on xarch - // so no need for SSE2 MoveMask here - Debug.Assert(Sse41.IsSupported); - return Sse41.TestZ(vector, vector); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsAllZero(this Vector256 vector) - { - Debug.Assert(Avx.IsSupported); - return Avx.TestZ(vector, vector); - } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index dfb68cc97406b2..b2b02bf0ccc926 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -90,7 +90,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (!cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -107,7 +108,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), return index + charPos; } - // Clear two lowest set bits + // Clear two the lowest set bits if (Bmi1.IsSupported) mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); else @@ -145,11 +146,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - // On Platforms with SSE41 and ARM64 use fast "is all zero" check - // it's especially important for ARM64 where ExtractMostSignificantBits is expensive - // but it also shows nice numbers on XArch - bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; - if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -266,7 +265,8 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (!cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -283,7 +283,6 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) { return charPos + offset; } - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. } } @@ -317,11 +316,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - // On Platforms with SSE41 and ARM64 use fast "is all zero" check - // it's especially important for ARM64 where ExtractMostSignificantBits is expensive - // but it also shows nice numbers on XArch - bool useFastAllZeroCheck = Sse41.IsSupported || AdvSimd.Arm64.IsSupported; - if (!useFastAllZeroCheck || !cmpAnd.IsAllZero()) + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); while (mask != 0) @@ -338,8 +335,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) { return charPos + offset; } - - mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + mask &= ~(uint)(0b11 << bitPos); // clear two the highest set bits. } } From f2372a03ff5a497b0dcc4f8b3369458305526f12 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 21:08:51 +0300 Subject: [PATCH 25/39] Address feedback --- .../src/System/Numerics/BitOperations.cs | 20 +++++++++++++++++++ .../src/System/SpanHelpers.Byte.cs | 16 ++++++--------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 6a7141cd01cd48..419a1c9f5f1146 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -708,5 +708,25 @@ public static nuint RotateRight(nuint value, int offset) return (nuint)RotateRight((uint)value, offset); #endif } + + /// + /// Reset the lowest significant bit in the given value + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ResetLowestSetBit(uint value) + { + // It's lowered to BLSR on x86 + return value & (value - 1); + } + + /// + /// Reset specific bit in the given value + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ResetBit(uint value, int bitPos) + { + // TODO: Recognize BTR on x86 and LSL+BIC on ARM + return value & ~(uint)(1 << bitPos); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 120a00309e6693..5c863e6b7f3865 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -100,9 +100,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { return offset + bitPos; } - - // Clear the lowest set bit (BLSR on xarch) - mask &= mask - 1; + // Clear the lowest set bit + mask = BitOperations.ResetLowestSetBit(mask); } } @@ -152,9 +151,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { return offset + bitPos; } - - // Clear the lowest set bit (BLSR on xarch) - mask &= mask - 1; + // Clear the lowest set bit + mask = BitOperations.ResetLowestSetBit(mask); } } offset += Vector128.Count; @@ -615,9 +613,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { return bitPos + offset; } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); + mask = BitOperations.ResetBit(mask, bitPos); } } @@ -667,9 +664,8 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { return bitPos + offset; } - // Clear the highest set bit. - mask &= ~(uint)(1 << bitPos); + mask = BitOperations.ResetBit(mask, bitPos); } } From 38ef9a955edc0a5c344d0adb5e57db26f7a6993e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 15 Jan 2022 23:11:33 +0300 Subject: [PATCH 26/39] micro-optimization, do-while is better here since mask is guaranteed to be non-zero --- .../src/System/SpanHelpers.Byte.cs | 16 ++++++++-------- .../src/System/SpanHelpers.Char.cs | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 5c863e6b7f3865..ed0477853611ff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -89,7 +89,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { int bitPos = BitOperations.TrailingZeroCount(mask); if (valueTailNLength == 1 || // we already matched two bytes @@ -102,7 +102,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } // Clear the lowest set bit mask = BitOperations.ResetLowestSetBit(mask); - } + } while (mask != 0); } offset += Vector256.Count; @@ -140,7 +140,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { int bitPos = BitOperations.TrailingZeroCount(mask); if (valueTailNLength == 1 || // we already matched two bytes @@ -153,7 +153,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } // Clear the lowest set bit mask = BitOperations.ResetLowestSetBit(mask); - } + } while (mask != 0); } offset += Vector128.Count; @@ -601,7 +601,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); @@ -615,7 +615,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } // Clear the highest set bit. mask = BitOperations.ResetBit(mask, bitPos); - } + } while (mask != 0); } offset -= Vector256.Count; @@ -652,7 +652,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); @@ -666,7 +666,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } // Clear the highest set bit. mask = BitOperations.ResetBit(mask, bitPos); - } + } while (mask != 0); } offset -= Vector128.Count; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index b2b02bf0ccc926..b1cf53d0bb26e3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -94,7 +94,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars @@ -113,7 +113,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); else mask &= ~(uint)(0b11 << bitPos); - } + } while (mask != 0); } index += Vector256.Count; @@ -151,7 +151,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars @@ -170,7 +170,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); else mask &= ~(uint)(0b11 << bitPos); - } + } while (mask != 0); } index += Vector128.Count; @@ -269,7 +269,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), if (cmpAnd != Vector256.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 30 - BitOperations.LeadingZeroCount(mask); @@ -284,7 +284,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) return charPos + offset; } mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. - } + } while (mask != 0); } offset -= Vector256.Count; @@ -321,7 +321,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); - while (mask != 0) + do { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 30 - BitOperations.LeadingZeroCount(mask); @@ -336,7 +336,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) return charPos + offset; } mask &= ~(uint)(0b11 << bitPos); // clear two the highest set bits. - } + } while (mask != 0); } offset -= Vector128.Count; From 4827ddc4b2dfbf6bbc8159de78f2f666b10301b0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 14:37:23 +0300 Subject: [PATCH 27/39] Address feedabc --- .../src/System/SpanHelpers.Byte.cs | 331 +++++++++--------- .../src/System/SpanHelpers.Char.cs | 30 +- 2 files changed, 165 insertions(+), 196 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 0b5635219e13fc..2b35f22d93cef7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -23,17 +23,13 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte return 0; // A zero-length sequence is always treated as "found" at the start of the search space. int valueTailLength = valueLength - 1; - if (valueTailLength == 0) - { - // for single-byte values use plain IndexOf - return IndexOf(ref searchSpace, value, searchSpaceLength); - } + return IndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain IndexOf byte valueHead = value; ref byte valueTail = ref Unsafe.Add(ref value, 1); int offset = 0; - nuint valueTailNLength = (nuint)(uint)valueTailLength; + nuint valueTailNLength = (uint)valueTailLength; if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { @@ -56,7 +52,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailNLength)) // The (nuint)-cast is necessary to pick the correct overload + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailNLength)) return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -107,17 +103,16 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), offset += Vector256.Count; - if (offset + valueTailLength == searchSpaceLength) + if (offset == searchSpaceLength - valueTailLength) return -1; // Overlap with the current chunk if there is not enough room for the next one - if (offset + valueTailLength + Vector256.Count > searchSpaceLength) + if (offset > searchSpaceLength - valueTailLength - Vector256.Count) offset = searchSpaceLength - valueTailLength - Vector256.Count; } while (true); } - - if (Vector128.IsHardwareAccelerated) + else // 128bit vector path (SSE2 or AdvSimd) { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -157,18 +152,163 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } offset += Vector128.Count; - if (offset + valueTailLength == searchSpaceLength) + if (offset == searchSpaceLength - valueTailLength) return -1; // Overlap with the current chunk if there is not enough room for the next one - if (offset + valueTailLength + Vector128.Count > searchSpaceLength) + if (offset > searchSpaceLength - valueTailLength - Vector128.Count) offset = searchSpaceLength - valueTailLength - Vector128.Count; } while (true); } + } - Debug.Fail("Unreachable"); + public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) + { + Debug.Assert(searchSpaceLength >= 0); + Debug.Assert(valueLength >= 0); + + if (valueLength == 0) + return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. + + int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain LastIndexOf + + byte valueHead = value; + ref byte valueTail = ref Unsafe.Add(ref value, 1); + int offset = 0; + nuint valueTailNLength = (uint)valueTailLength; + + if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + + while (true) + { + Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". + int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Do a quick search for the first element of "value". + int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); + if (relativeIndex < 0) + break; + + // Found the first element of "value". See if the tail matches. + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailNLength)) + return relativeIndex; // The tail matched. Return a successful find. + + offset += remainingSearchSpaceLength - relativeIndex; + } return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_BYTES: + if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + { + offset = searchSpaceLength - valueTailLength - Vector256.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create(value); + Vector256 ch2 = Vector256.Create(ch2Val); + + do + { + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } + // Clear the highest set bit. + mask = BitOperations.ResetBit(mask, bitPos); + } while (mask != 0); + } + + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + do + { + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), + ref valueTail, + valueTailNLength)) + { + return bitPos + offset; + } + // Clear the highest set bit. + mask = BitOperations.ResetBit(mask, bitPos); + } while (mask != 0); + } + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + } while (true); + } } // Adapted from IndexOf(...) @@ -528,161 +668,6 @@ public static unsafe int IndexOf(ref byte searchSpace, byte value, int length) return (int)(offset + 7); } - public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) - { - Debug.Assert(searchSpaceLength >= 0); - Debug.Assert(valueLength >= 0); - - if (valueLength == 0) - return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - - int valueTailLength = valueLength - 1; - - if (valueTailLength == 0) - { - // for single-byte values use plain LastIndexOf - return LastIndexOf(ref searchSpace, value, searchSpaceLength); - } - - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); - int offset = 0; - nuint valueTailNLength = (nuint)(uint)valueTailLength; - - if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) - { - goto SEARCH_TWO_BYTES; - } - - while (true) - { - Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". - int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; - if (remainingSearchSpaceLength <= 0) - break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. - - // Do a quick search for the first element of "value". - int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); - if (relativeIndex < 0) - break; - - // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailNLength)) // The (nunit)-cast is necessary to pick the correct overload - return relativeIndex; // The tail matched. Return a successful find. - - offset += remainingSearchSpaceLength - relativeIndex; - } - return -1; - - // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła - // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 - SEARCH_TWO_BYTES: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) - { - offset = searchSpaceLength - valueTailLength - Vector256.Count; - - // Find the last unique (which is not equal to ch1) byte - // the algorithm is fine if both are equal, just a little bit less efficient - byte ch2Val = Unsafe.Add(ref value, valueTailLength); - int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) - ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - - Vector256 ch1 = Vector256.Create(value); - Vector256 ch2 = Vector256.Create(ch2Val); - - do - { - Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); - Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - - // Early out: cmpAnd is all zeros - if (cmpAnd != Vector256.Zero) - { - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) - { - return bitPos + offset; - } - // Clear the highest set bit. - mask = BitOperations.ResetBit(mask, bitPos); - } while (mask != 0); - } - - offset -= Vector256.Count; - if (offset == -Vector256.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; - - } while (true); - } - if (Vector128.IsHardwareAccelerated) - { - offset = searchSpaceLength - valueTailLength - Vector128.Count; - - // Find the last unique (which is not equal to ch1) byte - // the algorithm is fine if both are equal, just a little bit less efficient - byte ch2Val = Unsafe.Add(ref value, valueTailLength); - int ch1ch2Distance = valueTailLength; - while (ch2Val == value && ch1ch2Distance > 1) - ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); - - Vector128 ch1 = Vector128.Create(value); - Vector128 ch2 = Vector128.Create(ch2Val); - - do - { - Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); - Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - - // Early out: cmpAnd is all zeros - // it's especially important for ARM where ExtractMostSignificantBits is not cheap - if (cmpAnd != Vector128.Zero) - { - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - // unlike IndexOf, here we use LZCNT to process matches starting from the end - int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) - { - return bitPos + offset; - } - // Clear the highest set bit. - mask = BitOperations.ResetBit(mask, bitPos); - } while (mask != 0); - } - - offset -= Vector128.Count; - if (offset == -Vector128.Count) - return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (offset < 0) - offset = 0; - - } while (true); - } - - Debug.Fail("Unreachable"); - return -1; - } - [MethodImpl(MethodImplOptions.AggressiveOptimization)] public static int LastIndexOf(ref byte searchSpace, byte value, int length) { @@ -1805,13 +1790,11 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l // This becomes a conditional jmp foward to not favor it. goto NotEqual; } - // Use Vector128.Size as Vector128.Count doesn't inline at R2R time - // https://github.com/dotnet/runtime/issues/32714 - else if (length >= Vector128.Size) + else if (length >= (nuint)Vector128.Count) { Vector128 vecResult; nuint offset = 0; - nuint lengthToExamine = length - Vector128.Size; + nuint lengthToExamine = length - (nuint)Vector128.Count; // Unsigned, so it shouldn't have overflowed larger than length (rather than negative) Debug.Assert(lengthToExamine < length); if (lengthToExamine != 0) @@ -1825,7 +1808,7 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l { goto NotEqual; } - offset += Vector128.Size; + offset += (nuint)Vector128.Count; } while (lengthToExamine > offset); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 5876eaa2721404..9972326f4d614d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -24,7 +24,6 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char int index = 0; int valueTailLength = valueLength - 1; - if (valueTailLength == 0) { // for single-char values use plain IndexOf @@ -33,7 +32,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char char valueHead = value; ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; + nuint valueTailByteLength = ((uint)valueTailLength * 2); if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { @@ -127,8 +126,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), } while (true); } - - if (Vector128.IsHardwareAccelerated) + else // 128bit vector path (SSE2 or AdvSimd) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -175,18 +173,15 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), index += Vector128.Count; - if (index + valueTailLength == searchSpaceLength) + if (index == searchSpaceLength - valueTailLength) return -1; // Overlap with the current chunk if there is not enough room for the next one - if (index + valueTailLength + Vector128.Count > searchSpaceLength) + if (index > searchSpaceLength - valueTailLength - Vector128.Count) index = searchSpaceLength - valueTailLength - Vector128.Count; } while (true); } - - Debug.Fail("Unreachable"); - return -1; } public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref char value, int valueLength) @@ -198,18 +193,13 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. int valueTailLength = valueLength - 1; - if (valueTailLength == 0) - { - // for single-char values use plain LastIndexOf - return LastIndexOf(ref searchSpace, value, searchSpaceLength); - } + return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-char values use plain LastIndexOf int offset = 0; - char valueHead = value; ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = (nuint)(uint)valueTailLength * 2; + nuint valueTailByteLength = ((uint)valueTailLength * 2); if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { @@ -232,7 +222,7 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), ref valueTail, - valueTailByteLength)) // The (nunit)-cast is necessary to pick the correct overload + valueTailByteLength)) { return relativeIndex; // The tail matched. Return a successful find. } @@ -295,8 +285,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) offset = 0; } while (true); } - - if (Vector128.IsHardwareAccelerated) + else // 128bit vector path (SSE2 or AdvSimd) { offset = searchSpaceLength - valueTailLength - Vector128.Count; @@ -347,9 +336,6 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) offset = 0; } while (true); } - - Debug.Fail("Unreachable"); - return -1; } [MethodImpl(MethodImplOptions.AggressiveOptimization)] From d6013518dbebd635ba8b642384247d31a51b7e08 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 15:57:35 +0300 Subject: [PATCH 28/39] Use clever trick I borrowed from IndexOfAny for trailing elements --- .../src/System/SpanHelpers.Byte.cs | 84 ++++++++++++------ .../src/System/SpanHelpers.Char.cs | 88 +++++++++++++++---- 2 files changed, 127 insertions(+), 45 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 2b35f22d93cef7..5fea1cbb7843ea 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -63,7 +63,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_BYTES: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + if (Avx2.IsSupported && searchSpaceLength - valueTailLength - Vector256.Count >= 0) { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -75,6 +75,9 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte Vector256 ch1 = Vector256.Create(value); Vector256 ch2 = Vector256.Create(ch2Val); + // Subtract Vector256.Count in order to make a fast loop where we will never + // cross boundaries, we'll handle the last chunk separately + int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; do { Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); @@ -91,26 +94,37 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte if (valueTailNLength == 1 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + ref valueTail, valueTailNLength)) { return offset + bitPos; } - // Clear the lowest set bit - mask = BitOperations.ResetLowestSetBit(mask); + mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit } while (mask != 0); } - offset += Vector256.Count; + } while (offset < lengthToExamine); - if (offset == searchSpaceLength - valueTailLength) + // Handle the last Vector256.Count chunk we previously subtracted + // We might overlap with the previously processed data + { + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)lengthToExamine)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(lengthToExamine + ch1ch2Distance))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + if (cmpAnd == Vector256.Zero) return -1; - - // Overlap with the current chunk if there is not enough room for the next one - if (offset > searchSpaceLength - valueTailLength - Vector256.Count) - offset = searchSpaceLength - valueTailLength - Vector256.Count; - - } while (true); + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual(ref Unsafe.Add(ref searchSpace, lengthToExamine + bitPos + 1), ref valueTail, valueTailNLength)) + { + return lengthToExamine + bitPos; + } + mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit + } while (mask != 0); + return -1; + } } else // 128bit vector path (SSE2 or AdvSimd) { @@ -124,6 +138,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), Vector128 ch1 = Vector128.Create(value); Vector128 ch2 = Vector128.Create(ch2Val); + // Subtract Vector128.Count in order to make a fast loop where we will never + // cross boundaries, we'll handle the last chunk separately + int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; do { Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); @@ -141,8 +158,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), if (valueTailNLength == 1 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + ref valueTail, valueTailNLength)) { return offset + bitPos; } @@ -151,15 +167,29 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), } while (mask != 0); } offset += Vector128.Count; + } while (offset < lengthToExamine); - if (offset == searchSpaceLength - valueTailLength) + // Handle the last Vector128.Count chunk we previously subtracted + // We might overlap with the previously processed data + { + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)lengthToExamine)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(lengthToExamine + ch1ch2Distance))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + if (cmpAnd == Vector128.Zero) return -1; - - // Overlap with the current chunk if there is not enough room for the next one - if (offset > searchSpaceLength - valueTailLength - Vector128.Count) - offset = searchSpaceLength - valueTailLength - Vector128.Count; - - } while (true); + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueTailNLength == 1 || // we already matched two bytes + SequenceEqual(ref Unsafe.Add(ref searchSpace, lengthToExamine + bitPos + 1), ref valueTail, valueTailNLength)) + { + return lengthToExamine + bitPos; + } + mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit + } while (mask != 0); + return -1; + } } } @@ -221,7 +251,6 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b Vector256 ch1 = Vector256.Create(value); Vector256 ch2 = Vector256.Create(ch2Val); - do { Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); @@ -255,7 +284,6 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), // Overlap with the current chunk if there is not enough room for the next one if (offset < 0) offset = 0; - } while (true); } else // 128bit vector path (SSE2 or AdvSimd) @@ -1731,8 +1759,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l else { nuint offset = length - sizeof(uint); - uint differentBits = LoadUInt(ref first) - LoadUInt(ref second); - differentBits |= LoadUInt(ref first, offset) - LoadUInt(ref second, offset); + uint differentBits = unchecked(LoadUInt(ref first) - LoadUInt(ref second)); + differentBits |= unchecked(LoadUInt(ref first, offset) - LoadUInt(ref second, offset)); result = (differentBits == 0); goto Result; } @@ -1866,8 +1894,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l Debug.Assert(length <= (nuint)sizeof(nuint) * 2); nuint offset = length - (nuint)sizeof(nuint); - nuint differentBits = LoadNUInt(ref first) - LoadNUInt(ref second); - differentBits |= LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset); + nuint differentBits = unchecked(LoadNUInt(ref first) - LoadNUInt(ref second)); + differentBits |= unchecked(LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset)); result = (differentBits == 0); goto Result; } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 9972326f4d614d..6ae368e032ccb7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -71,7 +71,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + if (Avx2.IsSupported && searchSpaceLength - valueTailLength - Vector256.Count >= 0) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -83,6 +83,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create(ch2Val); + // Subtract Vector256.Count in order to make a fast loop where we will never + // cross boundaries, we'll handle the last chunk separately + int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; do { Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); @@ -101,8 +104,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), if (valueTailByteLength == 1 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + ref valueTail, valueTailByteLength)) { return index + charPos; } @@ -114,17 +116,41 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), mask &= ~(uint)(0b11 << bitPos); } while (mask != 0); } - index += Vector256.Count; + } while (index < lengthToExamine); + + // Handle the last Vector256.Count chunk we previously subtracted + // We might overlap with the previously processed data + { + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (index + valueTailLength == searchSpaceLength) + // Early out: cmpAnd is all zeros + if (cmpAnd == Vector256.Zero) return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (index + valueTailLength + Vector256.Count > searchSpaceLength) - index = searchSpaceLength - valueTailLength - Vector256.Count; + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), ref valueTail, valueTailByteLength)) + { + return index + charPos; + } - } while (true); + // Clear two the lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } while (mask != 0); + + return -1; + } } else // 128bit vector path (SSE2 or AdvSimd) { @@ -138,6 +164,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), Vector128 ch1 = Vector128.Create((ushort)valueHead); Vector128 ch2 = Vector128.Create(ch2Val); + // Subtract Vector128.Count in order to make a fast loop where we will never + // cross boundaries, we'll handle the last chunk separately + int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; do { Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); @@ -157,8 +186,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), if (valueTailByteLength == 1 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, - valueTailByteLength)) + ref valueTail, valueTailByteLength)) { return index + charPos; } @@ -170,17 +198,43 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), mask &= ~(uint)(0b11 << bitPos); } while (mask != 0); } - index += Vector128.Count; + } while (index < lengthToExamine); + + // Handle the last Vector128.Count chunk we previously subtracted + // We might overlap with the previously processed data + { + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (index == searchSpaceLength - valueTailLength) + // Early out: cmpAnd is all zeros + if (cmpAnd == Vector128.Zero) return -1; - // Overlap with the current chunk if there is not enough room for the next one - if (index > searchSpaceLength - valueTailLength - Vector128.Count) - index = searchSpaceLength - valueTailLength - Vector128.Count; + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueTailByteLength == 1 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), + ref valueTail, valueTailByteLength)) + { + return index + charPos; + } + + // Clear two the lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } while (mask != 0); + + return -1; } - while (true); } } From 3a005bc9bf6671b207016919851b7829b6b3a946 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 17:37:29 +0300 Subject: [PATCH 29/39] give up on +1 bump for SequenceCompare --- .../src/System/SpanHelpers.Byte.cs | 48 ++++++++++--------- .../src/System/SpanHelpers.Char.cs | 47 +++++++++--------- 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 5fea1cbb7843ea..2e89edd1c12511 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -27,15 +27,14 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte return IndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain IndexOf byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int offset = 0; - nuint valueTailNLength = (uint)valueTailLength; if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } + ref byte valueTail = ref Unsafe.Add(ref value, 1); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; while (remainingSearchSpaceLength > 0) @@ -52,7 +51,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, valueTailNLength)) + if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, (nuint)(uint)valueTailLength)) return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -91,10 +90,10 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte do { int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes + if (valueLength == 2 || // we already matched two bytes SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, valueTailNLength)) + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return offset + bitPos; } @@ -116,8 +115,10 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), do { int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual(ref Unsafe.Add(ref searchSpace, lengthToExamine + bitPos + 1), ref valueTail, valueTailNLength)) + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return lengthToExamine + bitPos; } @@ -155,10 +156,10 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), do { int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes + if (valueLength == 2 || // we already matched two bytes SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, valueTailNLength)) + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return offset + bitPos; } @@ -181,8 +182,10 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), do { int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes - SequenceEqual(ref Unsafe.Add(ref searchSpace, lengthToExamine + bitPos + 1), ref valueTail, valueTailNLength)) + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return lengthToExamine + bitPos; } @@ -206,15 +209,16 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain LastIndexOf byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int offset = 0; - nuint valueTailNLength = (uint)valueTailLength; if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } + ref byte valueTail = ref Unsafe.Add(ref value, 1); + nuint valueTailNLength = (uint)valueTailLength; + while (true) { Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". @@ -265,11 +269,10 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes + if (valueLength == 2 || // we already matched two bytes SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return bitPos + offset; } @@ -315,11 +318,10 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), { // unlike IndexOf, here we use LZCNT to process matches starting from the end int bitPos = 31 - BitOperations.LeadingZeroCount(mask); - if (valueTailNLength == 1 || // we already matched two bytes + if (valueLength == 2 || // we already matched two bytes SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos + 1), - ref valueTail, - valueTailNLength)) + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) { return bitPos + offset; } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 6ae368e032ccb7..a54e2d3302b4a6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -31,14 +31,14 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char } char valueHead = value; - ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = ((uint)valueTailLength * 2); if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + nuint valueTailByteLength = ((uint)valueTailLength * 2); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; while (remainingSearchSpaceLength > 0) @@ -101,10 +101,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars + if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, valueTailByteLength)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -136,8 +136,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars - SequenceEqual(ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), ref valueTail, valueTailByteLength)) + if (valueLength == 2 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -183,10 +185,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars + if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, valueTailByteLength)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -218,10 +220,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), int bitPos = BitOperations.TrailingZeroCount(mask); // div by 2 (shr) because we work with 2-byte chars int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars + if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos + 1)), - ref valueTail, valueTailByteLength)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -252,14 +254,15 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c int offset = 0; char valueHead = value; - ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = ((uint)valueTailLength * 2); if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + nuint valueTailByteLength = ((uint)valueTailLength * 2); + while (true) { Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". @@ -319,11 +322,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars + if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return charPos + offset; } @@ -370,11 +372,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)) int bitPos = 30 - BitOperations.LeadingZeroCount(mask); int charPos = (int)((uint)bitPos / 2); - if (valueTailByteLength == 1 || // we already matched two chars + if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos + 1)), - ref valueTail, - valueTailByteLength)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return charPos + offset; } From 9fefe81da3aa097566fb20c0972a72f823785f34 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 17:44:43 +0300 Subject: [PATCH 30/39] Clean up --- .../src/System/SpanHelpers.Byte.cs | 21 +++++++++++-------- .../src/System/SpanHelpers.Char.cs | 16 +++++++------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 2e89edd1c12511..1f09b37fa13faf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -51,7 +51,9 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, (nuint)(uint)valueTailLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + 1), + ref valueTail, (nuint)(uint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -93,7 +95,7 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return offset + bitPos; } @@ -118,7 +120,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return lengthToExamine + bitPos; } @@ -159,7 +161,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return offset + bitPos; } @@ -185,7 +187,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return lengthToExamine + bitPos; } @@ -217,7 +219,6 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b } ref byte valueTail = ref Unsafe.Add(ref value, 1); - nuint valueTailNLength = (uint)valueTailLength; while (true) { @@ -232,7 +233,9 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b break; // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, valueTailNLength)) + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, relativeIndex + 1), + ref valueTail, (nuint)(uint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload return relativeIndex; // The tail matched. Return a successful find. offset += remainingSearchSpaceLength - relativeIndex; @@ -272,7 +275,7 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return bitPos + offset; } @@ -321,7 +324,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), if (valueLength == 2 || // we already matched two bytes SequenceEqual( ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { return bitPos + offset; } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a54e2d3302b4a6..4da073a488d050 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -104,7 +104,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return index + charPos; } @@ -139,7 +139,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return index + charPos; } @@ -188,7 +188,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return index + charPos; } @@ -223,7 +223,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return index + charPos; } @@ -261,7 +261,6 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c } ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = ((uint)valueTailLength * 2); while (true) { @@ -278,8 +277,7 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c // Found the first element of "value". See if the tail matches. if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), - ref valueTail, - valueTailByteLength)) + ref valueTail, (nuint)(uint)valueTailLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return relativeIndex; // The tail matched. Return a successful find. } @@ -325,7 +323,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return charPos + offset; } @@ -375,7 +373,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload { return charPos + offset; } From c118dd20ee06ee75b4e50d0e4e9a9060625beb00 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 17:51:44 +0300 Subject: [PATCH 31/39] Clean up --- .../src/System/SpanHelpers.Byte.cs | 8 ++++---- .../src/System/SpanHelpers.Char.cs | 17 ++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 1f09b37fa13faf..e26c79fcda6759 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1764,8 +1764,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l else { nuint offset = length - sizeof(uint); - uint differentBits = unchecked(LoadUInt(ref first) - LoadUInt(ref second)); - differentBits |= unchecked(LoadUInt(ref first, offset) - LoadUInt(ref second, offset)); + uint differentBits = LoadUInt(ref first) - LoadUInt(ref second); + differentBits |= LoadUInt(ref first, offset) - LoadUInt(ref second, offset); result = (differentBits == 0); goto Result; } @@ -1899,8 +1899,8 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l Debug.Assert(length <= (nuint)sizeof(nuint) * 2); nuint offset = length - (nuint)sizeof(nuint); - nuint differentBits = unchecked(LoadNUInt(ref first) - LoadNUInt(ref second)); - differentBits |= unchecked(LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset)); + nuint differentBits = LoadNUInt(ref first) - LoadNUInt(ref second); + differentBits |= LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset)); result = (differentBits == 0); goto Result; } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 4da073a488d050..a6bc7ca22f2f84 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -38,7 +38,6 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char } ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - nuint valueTailByteLength = ((uint)valueTailLength * 2); int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; while (remainingSearchSpaceLength > 0) @@ -58,7 +57,7 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), ref valueTail, - valueTailByteLength)) + (nuint)(uint)valueTailLength * 2)) { return index; // The tail matched. Return a successful find. } @@ -104,7 +103,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -139,7 +138,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nui if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -188,7 +187,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nui if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -223,7 +222,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nui if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return index + charPos; } @@ -277,7 +276,7 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c // Found the first element of "value". See if the tail matches. if (SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), - ref valueTail, (nuint)(uint)valueTailLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref valueTail, (nuint)(uint)valueTailLength * 2)) { return relativeIndex; // The tail matched. Return a successful find. } @@ -323,7 +322,7 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return charPos + offset; } @@ -373,7 +372,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nui if (valueLength == 2 || // we already matched two chars SequenceEqual( ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // The (nuint)-cast is necessary to pick the correct overload + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { return charPos + offset; } From 7e8b100685bf412c26e3df04a9949f273c1d2048 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 18 Jan 2022 17:54:35 +0300 Subject: [PATCH 32/39] fix build --- .../System.Private.CoreLib/src/System/SpanHelpers.Byte.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index e26c79fcda6759..8feb9af89fc455 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -1900,7 +1900,7 @@ public static unsafe bool SequenceEqual(ref byte first, ref byte second, nuint l nuint offset = length - (nuint)sizeof(nuint); nuint differentBits = LoadNUInt(ref first) - LoadNUInt(ref second); - differentBits |= LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset)); + differentBits |= LoadNUInt(ref first, offset) - LoadNUInt(ref second, offset); result = (differentBits == 0); goto Result; } From e9df89129bd61befd96c41d042490562893b05e1 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 22 Jan 2022 03:07:26 +0300 Subject: [PATCH 33/39] Add debug asserts --- .../src/System/SpanHelpers.Byte.cs | 34 +++++++++++++++---- .../src/System/SpanHelpers.Char.cs | 23 +++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 8feb9af89fc455..d2d7dbf26a3791 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -79,8 +79,12 @@ ref Unsafe.Add(ref searchSpace, offset + 1), // Subtract Vector256.Count in order to make a fast loop where we will never // cross boundaries, we'll handle the last chunk separately int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; + Debug.Assert(lengthToExamine >= 0); do { + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); @@ -108,8 +112,15 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), // Handle the last Vector256.Count chunk we previously subtracted // We might overlap with the previously processed data { - Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)lengthToExamine)); - Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(lengthToExamine + ch1ch2Distance))); + if (offset == searchSpaceLength - valueTailLength) + return -1; + offset = searchSpaceLength - valueTailLength - Vector256.Count; + + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); if (cmpAnd == Vector256.Zero) return -1; @@ -122,7 +133,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { - return lengthToExamine + bitPos; + return offset + bitPos; } mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit } while (mask != 0); @@ -144,8 +155,12 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), // Subtract Vector128.Count in order to make a fast loop where we will never // cross boundaries, we'll handle the last chunk separately int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; + Debug.Assert(lengthToExamine >= 0); do { + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); @@ -175,8 +190,15 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), // Handle the last Vector128.Count chunk we previously subtracted // We might overlap with the previously processed data { - Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)lengthToExamine)); - Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(lengthToExamine + ch1ch2Distance))); + if (offset == searchSpaceLength - valueTailLength) + return -1; + offset = searchSpaceLength - valueTailLength - Vector128.Count; + + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); if (cmpAnd == Vector128.Zero) return -1; @@ -189,7 +211,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), ref Unsafe.Add(ref searchSpace, offset + bitPos), ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload { - return lengthToExamine + bitPos; + return offset + bitPos; } mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit } while (mask != 0); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a6bc7ca22f2f84..510dfec96033e4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -85,8 +85,13 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), // Subtract Vector256.Count in order to make a fast loop where we will never // cross boundaries, we'll handle the last chunk separately int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; + Debug.Assert(lengthToExamine >= 0); + do { + // Make sure we don't go out of bounds + Debug.Assert(index + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); @@ -121,6 +126,13 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // Handle the last Vector256.Count chunk we previously subtracted // We might overlap with the previously processed data { + if (index == searchSpaceLength - valueTailLength) + return -1; + index = searchSpaceLength - valueTailLength - Vector256.Count; + + // Make sure we don't go out of bounds + Debug.Assert(index + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); @@ -168,8 +180,12 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // Subtract Vector128.Count in order to make a fast loop where we will never // cross boundaries, we'll handle the last chunk separately int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; + Debug.Assert(lengthToExamine >= 0); do { + // Make sure we don't go out of bounds + Debug.Assert(index + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); @@ -205,6 +221,13 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) // Handle the last Vector128.Count chunk we previously subtracted // We might overlap with the previously processed data { + if (index == searchSpaceLength - valueTailLength) + return -1; + index = searchSpaceLength - valueTailLength - Vector128.Count; + + // Make sure we don't go out of bounds + Debug.Assert(index + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); From cb605352bcae6ebd83e5f173e32e56a3d5b149e4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 22 Jan 2022 12:30:03 +0300 Subject: [PATCH 34/39] Clean up: give up on the unrolled trick - too little value from code bloat --- .../src/System/SpanHelpers.Byte.cs | 101 +++--------- .../src/System/SpanHelpers.Char.cs | 154 +++++------------- 2 files changed, 60 insertions(+), 195 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index d2d7dbf26a3791..3a3bc586912ae8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -26,16 +26,16 @@ public static int IndexOf(ref byte searchSpace, int searchSpaceLength, ref byte if (valueTailLength == 0) return IndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain IndexOf - byte valueHead = value; int offset = 0; - - if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) + byte valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } ref byte valueTail = ref Unsafe.Add(ref value, 1); - int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; + int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; while (remainingSearchSpaceLength > 0) { @@ -64,7 +64,7 @@ ref Unsafe.Add(ref searchSpace, offset + 1), // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_BYTES: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength - Vector256.Count >= 0) + if (Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) { // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -76,12 +76,9 @@ ref Unsafe.Add(ref searchSpace, offset + 1), Vector256 ch1 = Vector256.Create(value); Vector256 ch2 = Vector256.Create(ch2Val); - // Subtract Vector256.Count in order to make a fast loop where we will never - // cross boundaries, we'll handle the last chunk separately - int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; - Debug.Assert(lengthToExamine >= 0); do { + Debug.Assert(offset >= 0); // Make sure we don't go out of bounds Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); @@ -107,38 +104,14 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), } while (mask != 0); } offset += Vector256.Count; - } while (offset < lengthToExamine); - // Handle the last Vector256.Count chunk we previously subtracted - // We might overlap with the previously processed data - { - if (offset == searchSpaceLength - valueTailLength) + if (offset == searchSpaceMinusValueTailLength) return -1; - offset = searchSpaceLength - valueTailLength - Vector256.Count; - - // Make sure we don't go out of bounds - Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); - Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); - Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (cmpAnd == Vector256.Zero) - return -1; - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload - { - return offset + bitPos; - } - mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit - } while (mask != 0); - return -1; - } + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector256.Count) + offset = searchSpaceMinusValueTailLength - Vector256.Count; + } while (true); } else // 128bit vector path (SSE2 or AdvSimd) { @@ -152,12 +125,9 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), Vector128 ch1 = Vector128.Create(value); Vector128 ch2 = Vector128.Create(ch2Val); - // Subtract Vector128.Count in order to make a fast loop where we will never - // cross boundaries, we'll handle the last chunk separately - int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; - Debug.Assert(lengthToExamine >= 0); do { + Debug.Assert(offset >= 0); // Make sure we don't go out of bounds Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); @@ -166,7 +136,6 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); // Early out: cmpAnd is all zeros - // it's especially important for ARM where ExtractMostSignificantBits is not cheap if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); @@ -185,38 +154,14 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), } while (mask != 0); } offset += Vector128.Count; - } while (offset < lengthToExamine); - // Handle the last Vector128.Count chunk we previously subtracted - // We might overlap with the previously processed data - { - if (offset == searchSpaceLength - valueTailLength) + if (offset == searchSpaceMinusValueTailLength) return -1; - offset = searchSpaceLength - valueTailLength - Vector128.Count; - // Make sure we don't go out of bounds - Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); - - Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); - Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); - Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - if (cmpAnd == Vector128.Zero) - return -1; - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - int bitPos = BitOperations.TrailingZeroCount(mask); - if (valueLength == 2 || // we already matched two bytes - SequenceEqual( - ref Unsafe.Add(ref searchSpace, offset + bitPos), - ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload - { - return offset + bitPos; - } - mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit - } while (mask != 0); - return -1; - } + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector128.Count) + offset = searchSpaceMinusValueTailLength - Vector128.Count; + } while (true); } } @@ -232,10 +177,10 @@ public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref b if (valueTailLength == 0) return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain LastIndexOf - byte valueHead = value; int offset = 0; - - if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) + byte valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) { goto SEARCH_TWO_BYTES; } @@ -267,9 +212,9 @@ ref Unsafe.Add(ref searchSpace, relativeIndex + 1), // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_BYTES: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + if (Avx2.IsSupported && searchSpaceMinusValueTailLength >= Vector256.Count) { - offset = searchSpaceLength - valueTailLength - Vector256.Count; + offset = searchSpaceMinusValueTailLength - Vector256.Count; // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient @@ -316,7 +261,7 @@ ref Unsafe.Add(ref searchSpace, offset + bitPos), } else // 128bit vector path (SSE2 or AdvSimd) { - offset = searchSpaceLength - valueTailLength - Vector128.Count; + offset = searchSpaceMinusValueTailLength - Vector128.Count; // Find the last unique (which is not equal to ch1) byte // the algorithm is fine if both are equal, just a little bit less efficient diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 510dfec96033e4..fb00b062538f2f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -22,7 +22,6 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - int index = 0; int valueTailLength = valueLength - 1; if (valueTailLength == 0) { @@ -30,47 +29,48 @@ public static int IndexOf(ref char searchSpace, int searchSpaceLength, ref char return IndexOf(ref searchSpace, value, searchSpaceLength); } + int offset = 0; char valueHead = value; - - if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); - int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; + int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". - int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, index), valueHead, remainingSearchSpaceLength); + int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueHead, remainingSearchSpaceLength); if (relativeIndex < 0) break; remainingSearchSpaceLength -= relativeIndex; - index += relativeIndex; + offset += relativeIndex; if (remainingSearchSpaceLength <= 0) break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + 1)), ref valueTail, (nuint)(uint)valueTailLength * 2)) { - return index; // The tail matched. Return a successful find. + return offset; // The tail matched. Return a successful find. } remainingSearchSpaceLength--; - index++; + offset++; } return -1; // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength - Vector256.Count >= 0) + if (Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) { // Find the last unique (which is not equal to ch1) character // the algorithm is fine if both are equal, just a little bit less efficient @@ -82,18 +82,13 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), Vector256 ch1 = Vector256.Create((ushort)valueHead); Vector256 ch2 = Vector256.Create(ch2Val); - // Subtract Vector256.Count in order to make a fast loop where we will never - // cross boundaries, we'll handle the last chunk separately - int lengthToExamine = searchSpaceLength - valueTailLength - Vector256.Count; - Debug.Assert(lengthToExamine >= 0); - do { // Make sure we don't go out of bounds - Debug.Assert(index + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); - Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); - Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, offset + ch1ch2Distance)); Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); // Early out: cmpAnd is all zeros @@ -107,10 +102,10 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), int charPos = (int)((uint)bitPos / 2); if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { - return index + charPos; + return offset + charPos; } // Clear two the lowest set bits @@ -120,50 +115,15 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) mask &= ~(uint)(0b11 << bitPos); } while (mask != 0); } - index += Vector256.Count; - } while (index < lengthToExamine); + offset += Vector256.Count; - // Handle the last Vector256.Count chunk we previously subtracted - // We might overlap with the previously processed data - { - if (index == searchSpaceLength - valueTailLength) + if (offset == searchSpaceMinusValueTailLength) return -1; - index = searchSpaceLength - valueTailLength - Vector256.Count; - // Make sure we don't go out of bounds - Debug.Assert(index + ch1ch2Distance + Vector256.Count <= searchSpaceLength); - - Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, index)); - Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, index + ch1ch2Distance)); - Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - - // Early out: cmpAnd is all zeros - if (cmpAnd == Vector256.Zero) - return -1; - - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) - { - return index + charPos; - } - - // Clear two the lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); - } while (mask != 0); - - return -1; - } + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector256.Count) + offset = searchSpaceMinusValueTailLength - Vector256.Count; + } while (true); } else // 128bit vector path (SSE2 or AdvSimd) { @@ -177,21 +137,16 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) Vector128 ch1 = Vector128.Create((ushort)valueHead); Vector128 ch2 = Vector128.Create(ch2Val); - // Subtract Vector128.Count in order to make a fast loop where we will never - // cross boundaries, we'll handle the last chunk separately - int lengthToExamine = searchSpaceLength - valueTailLength - Vector128.Count; - Debug.Assert(lengthToExamine >= 0); do { // Make sure we don't go out of bounds - Debug.Assert(index + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); - Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); - Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, offset + ch1ch2Distance)); Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); // Early out: cmpAnd is all zeros - // it's especially important for ARM where ExtractMostSignificantBits is not cheap if (cmpAnd != Vector128.Zero) { uint mask = cmpAnd.ExtractMostSignificantBits(); @@ -202,10 +157,10 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) int charPos = (int)((uint)bitPos / 2); if (valueLength == 2 || // we already matched two chars SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) { - return index + charPos; + return offset + charPos; } // Clear two lowest set bits @@ -215,50 +170,15 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) mask &= ~(uint)(0b11 << bitPos); } while (mask != 0); } - index += Vector128.Count; - } while (index < lengthToExamine); - - // Handle the last Vector128.Count chunk we previously subtracted - // We might overlap with the previously processed data - { - if (index == searchSpaceLength - valueTailLength) - return -1; - index = searchSpaceLength - valueTailLength - Vector128.Count; + offset += Vector128.Count; - // Make sure we don't go out of bounds - Debug.Assert(index + ch1ch2Distance + Vector128.Count <= searchSpaceLength); - - Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, index)); - Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, index + ch1ch2Distance)); - Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); - - // Early out: cmpAnd is all zeros - if (cmpAnd == Vector128.Zero) + if (offset == searchSpaceMinusValueTailLength) return -1; - uint mask = cmpAnd.ExtractMostSignificantBits(); - do - { - int bitPos = BitOperations.TrailingZeroCount(mask); - // div by 2 (shr) because we work with 2-byte chars - int charPos = (int)((uint)bitPos / 2); - if (valueLength == 2 || // we already matched two chars - SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + charPos)), - ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) - { - return index + charPos; - } - - // Clear two the lowest set bits - if (Bmi1.IsSupported) - mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); - else - mask &= ~(uint)(0b11 << bitPos); - } while (mask != 0); - - return -1; - } + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector128.Count) + offset = searchSpaceMinusValueTailLength - Vector128.Count; + } while (true); } } @@ -276,8 +196,8 @@ public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref c int offset = 0; char valueHead = value; - - if (Vector128.IsHardwareAccelerated && searchSpaceLength - valueTailLength >= Vector128.Count) + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) { goto SEARCH_TWO_CHARS; } @@ -311,9 +231,9 @@ ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 SEARCH_TWO_CHARS: - if (Avx2.IsSupported && searchSpaceLength - valueTailLength >= Vector256.Count) + if (Avx2.IsSupported && searchSpaceMinusValueTailLength >= Vector256.Count) { - offset = searchSpaceLength - valueTailLength - Vector256.Count; + offset = searchSpaceMinusValueTailLength - Vector256.Count; // Find the last unique (which is not equal to ch1) char // the algorithm is fine if both are equal, just a little bit less efficient @@ -363,7 +283,7 @@ ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) } else // 128bit vector path (SSE2 or AdvSimd) { - offset = searchSpaceLength - valueTailLength - Vector128.Count; + offset = searchSpaceMinusValueTailLength - Vector128.Count; // Find the last unique (which is not equal to ch1) char // the algorithm is fine if both are equal, just a little bit less efficient From 7c55951ead3cb5ac803c0c53bdac8ffffd397a67 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 22 Jan 2022 13:32:09 +0300 Subject: [PATCH 35/39] Add a test --- .../tests/Span/IndexOfSequence.byte.cs | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index 33250569da2da8..49dbd776fa2e6f 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -115,5 +115,116 @@ public static void IndexOfSequenceLengthOneValueJustPasttVeryEnd_Byte() int index = span.IndexOf(value); Assert.Equal(-1, index); } + + public static IEnumerable IndexOfSubSeqData() + { + // searchSpace, value, expected IndexOf value, expected LastIndexOf value + yield return new object[] { "11111", "111", 0, 2 }; + yield return new object[] { "1111111111", "1x1", -1, -1 }; + yield return new object[] { "1111111111", "111", 0, 7 }; + yield return new object[] { "11111111111x12111", "1x121", 10, 10 }; + yield return new object[] { "11111111111x12111", "11121", -1, -1 }; + yield return new object[] { "1111111111x121111", "11121", -1, -1 }; + yield return new object[] { "11111x12111111111", "11121", -1, -1 }; + yield return new object[] { "11111111111x12111", "1x211", -1, -1 }; + yield return new object[] { "11111111111x12111", "11211", -1, -1 }; + yield return new object[] { "1111111111x121111", "11211", -1, -1 }; + yield return new object[] { "11111x12111111111", "11211", -1, -1 }; + yield return new object[] { "11111111111x12111", "12111", 12, 12 }; + yield return new object[] { "1111111111x121111", "12111", 11, 11 }; + yield return new object[] { "11111x12111111111", "12111", 6, 6 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111", 0, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111", 0, 43 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111", 7, 42 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111", 7, 41 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111", 7, 10 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111", 7, 9 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111", 7, 7 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1211", 5, 48 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11121", 44, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "121111", 5, 19 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "12111211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121121111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11122111112111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111211111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111211111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111211111121111111", -1, -1 }; + yield return new object[] { "жжжжжжжжжжжжжж", "жжж", 0, 11 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "ж0ж", -1, -1 }; + yield return new object[] { "жжжжжаааааааааааааааччччс", "ччччс", 20, 20 }; + yield return new object[] { "жжжжжаааааааааааааааччччсссссссчччч", "чччч", 20, 31 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "1112", -1, -1 }; + yield return new object[] { "0уза0оцущ0оаз0щцуоазщцуо0азщцуоазщоц0узозцуоазуоцз0щауцз0оазцо", "0оаз0", 9, 9 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababb", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "babababababababababababababababc", "bb", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "bbb", 29, 29 }; + yield return new object[] { "abababababababababababababababbbc", "bbb", 29, 29 }; + yield return new object[] { "bbbbabababababababababababababababc", "bbb", 0, 1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababb", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "babababababababababababababababc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "aaa", -1, -1 }; + yield return new object[] { "abababababababababababababababbbc", "aaa", -1, -1 }; + yield return new object[] { "bbbbabababababababababababababababc", "aaa", -1, -1 }; + yield return new object[] { "ababababababababababababababababbc", "abaa", -1, -1 }; + yield return new object[] { "babbbabababababababababababababababc", "babb", 0, 0 }; + yield return new object[] { "babbbabababababababababababababababc", "сaсс", -1, -1 }; + yield return new object[] { "babbbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 15 }; + yield return new object[] { "babbbbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 17 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 18, 32 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "bbbbbbbbbbbbb", 20, 20 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbb", -1, -1 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "xxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 60, 60 }; + yield return new object[] { "xxxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 0, 0 }; + } + + [Theory] + [MemberData(nameof(IndexOfSubSeqData))] + public void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) + { + ReadOnlySpan searchSpaceSpan = searchSpace; + ReadOnlySpan valueSpan = value; + + Assert.Equal(expectedIndexOfValue, searchSpaceSpan.IndexOf(valueSpan)); + Assert.Equal(expectedLastIndexOfValue, searchSpaceSpan.LastIndexOf(valueSpan)); + + ReadOnlySpan byteSearchSpaceSpan = MemoryMarshal.Cast(searchSpaceSpan); + ReadOnlySpan byteValueSpan = MemoryMarshal.Cast(valueSpan); + + int expectedIndexOfValueByte = expectedIndexOfValue == -1 ? -1 : expectedIndexOfValue * 2; + int expectedLastIndexOfValueByte = expectedLastIndexOfValue == -1 ? -1 : expectedLastIndexOfValue * 2; + + Assert.Equal(expectedIndexOfValueByte, byteSearchSpaceSpan.IndexOf(byteValueSpan)); + Assert.Equal(expectedLastIndexOfValueByte, byteSearchSpaceSpan.LastIndexOf(byteValueSpan)); + } } } From 3f0b4c3d618faae4d48d0292a259990360e8474e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 22 Jan 2022 15:39:54 +0300 Subject: [PATCH 36/39] Fix build --- .../System.Memory/tests/Span/IndexOfSequence.byte.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index 49dbd776fa2e6f..a022eed8fbe680 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using Xunit; +using System.Collections.Generic; +using System.Runtime.InteropServices; namespace System.SpanTests { @@ -209,7 +211,7 @@ public static IEnumerable IndexOfSubSeqData() [Theory] [MemberData(nameof(IndexOfSubSeqData))] - public void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) + public static void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) { ReadOnlySpan searchSpaceSpan = searchSpace; ReadOnlySpan valueSpan = value; From 48f4fc700435c2ef0420f1c1b1dc3f9ae7371707 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 25 Jan 2022 01:43:49 +0300 Subject: [PATCH 37/39] Add byte-specific test --- .../tests/Span/IndexOfSequence.byte.cs | 192 ++++++++---------- .../tests/Span/IndexOfSequence.char.cs | 99 +++++++++ 2 files changed, 189 insertions(+), 102 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index a022eed8fbe680..d100e0a18dc61c 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -117,116 +117,104 @@ public static void IndexOfSequenceLengthOneValueJustPasttVeryEnd_Byte() int index = span.IndexOf(value); Assert.Equal(-1, index); } - + public static IEnumerable IndexOfSubSeqData() { // searchSpace, value, expected IndexOf value, expected LastIndexOf value - yield return new object[] { "11111", "111", 0, 2 }; - yield return new object[] { "1111111111", "1x1", -1, -1 }; - yield return new object[] { "1111111111", "111", 0, 7 }; - yield return new object[] { "11111111111x12111", "1x121", 10, 10 }; - yield return new object[] { "11111111111x12111", "11121", -1, -1 }; - yield return new object[] { "1111111111x121111", "11121", -1, -1 }; - yield return new object[] { "11111x12111111111", "11121", -1, -1 }; - yield return new object[] { "11111111111x12111", "1x211", -1, -1 }; - yield return new object[] { "11111111111x12111", "11211", -1, -1 }; - yield return new object[] { "1111111111x121111", "11211", -1, -1 }; - yield return new object[] { "11111x12111111111", "11211", -1, -1 }; - yield return new object[] { "11111111111x12111", "12111", 12, 12 }; - yield return new object[] { "1111111111x121111", "12111", 11, 11 }; - yield return new object[] { "11111x12111111111", "12111", 6, 6 }; - yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "111121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "1211211", -1, -1 }; - yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; - yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; - yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111", 0, 44 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111", 0, 43 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111", 7, 42 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111", 7, 41 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111", 7, 10 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111", 7, 9 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111", 7, 7 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1211", 5, 48 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11121", 44, 44 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "121111", 5, 19 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "12111211", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121121111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111211111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111211111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11122111112111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111211111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111211111111111111", -1, -1 }; - yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111211111121111111", -1, -1 }; - yield return new object[] { "жжжжжжжжжжжжжж", "жжж", 0, 11 }; - yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "ж0ж", -1, -1 }; - yield return new object[] { "жжжжжаааааааааааааааччччс", "ччччс", 20, 20 }; - yield return new object[] { "жжжжжаааааааааааааааччччсссссссчччч", "чччч", 20, 31 }; - yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "1112", -1, -1 }; - yield return new object[] { "0уза0оцущ0оаз0щцуоазщцуо0азщцуоазщоц0узозцуоазуоцз0щауцз0оазцо", "0оаз0", 9, 9 }; - yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; - yield return new object[] { "abababababababababababababababb", "bb", 29, 29 }; - yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; - yield return new object[] { "babababababababababababababababc", "bb", -1, -1 }; - yield return new object[] { "abababababababababababababababbb", "bbb", 29, 29 }; - yield return new object[] { "abababababababababababababababbbc", "bbb", 29, 29 }; - yield return new object[] { "bbbbabababababababababababababababc", "bbb", 0, 1 }; - yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; - yield return new object[] { "abababababababababababababababb", "aa", -1, -1 }; - yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; - yield return new object[] { "babababababababababababababababc", "aa", -1, -1 }; - yield return new object[] { "abababababababababababababababbb", "aaa", -1, -1 }; - yield return new object[] { "abababababababababababababababbbc", "aaa", -1, -1 }; - yield return new object[] { "bbbbabababababababababababababababc", "aaa", -1, -1 }; - yield return new object[] { "ababababababababababababababababbc", "abaa", -1, -1 }; - yield return new object[] { "babbbabababababababababababababababc", "babb", 0, 0 }; - yield return new object[] { "babbbabababababababababababababababc", "сaсс", -1, -1 }; - yield return new object[] { "babbbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 0 }; - yield return new object[] { "babbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 15 }; - yield return new object[] { "babbbbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 17 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 18, 32 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "bbbbbbbbbbbbb", 20, 20 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbb", -1, -1 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; - yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; - yield return new object[] { "xxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 60, 60 }; - yield return new object[] { "xxxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 0, 0 }; + yield return new object[] { new byte[]{0,0,0,0,0},new byte[]{0,0,0}, 0, 2}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0},new byte[]{0,71,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0},new byte[]{0,0,0}, 0, 7}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,71,0,1,0}, 10, 10}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,71,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,1,0,0,0}, 12, 12}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,1,0,0,0}, 11, 11}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,1,0,0,0}, 6, 6}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0}, 0, 44}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0}, 0, 43}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0}, 7, 42}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0}, 7, 41}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0}, 7, 11}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0}, 7, 10}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0}, 7, 9}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0}, 7, 7}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0}, 5, 48}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,0}, 44, 44}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0,0,0}, 5, 19}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0}, 7, 11}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,1,0,0,1,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,1,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,1,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,1,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{159,133,159,133,159,133}, 0, 22}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{159,133,255,159,133}, -1, -1}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,160,86,160,86,160,86,160,86,160,80},new byte[]{160,86,160,86,160,86,160,86,160,80}, 40, 40}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,160,86,160,86,160,86,160,86,160,80,160,80,160,80,160,80,160,80,160,80,160,80,160,86,160,86,160,86,160,86},new byte[]{160,86,160,86,160,86,160,86}, 40, 62}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{0,0,0,1}, -1, -1}; + yield return new object[] { new byte[]{255,160,82,159,134,159,127,255,159,141,160,85,160,82,160,88,255,159,141,159,127,159,134,255,160,88,160,85,160,82,159,141,159,127,159,134,160,88,160,85,160,82,159,141,255,159,127,159,134,160,88,160,85,160,82,159,141,159,127,159,134,160,88,159,141,160,85,255,160,82,159,134,159,141,159,134,160,85,160,82,159,141,159,127,159,134,160,82,159,141,160,85,159,134,255,160,88,159,127,160,82,160,85,159,134,255,159,141,159,127,159,134,160,85,159,141},new byte[]{255,159,141,159,127,159,134,255}, 16, 16}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,49}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49},new byte[]{49,49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49,50},new byte[]{49,49,49}, 29, 29}; + yield return new object[] { new byte[]{49,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,49,49}, 0, 1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49,50},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{49,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,49,48,48}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,48,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{160,80,48,160,80,160,80}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 15}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 17}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 18, 32}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,49,49,49,49,49,49,49,49,49,49,49,49}, 20, 20}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71},new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71}, 60, 60}; + yield return new object[] { new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,71,71,71,71,71,71,71,71,71,71,71,71,71,71},new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71}, 0, 0}; } [Theory] [MemberData(nameof(IndexOfSubSeqData))] - public static void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) + public static void ValueStartsAndEndsWithTheSameChars(byte[] searchSpace, byte[] value, int expectedIndexOfValue, int expectedLastIndexOfValue) { - ReadOnlySpan searchSpaceSpan = searchSpace; - ReadOnlySpan valueSpan = value; - - Assert.Equal(expectedIndexOfValue, searchSpaceSpan.IndexOf(valueSpan)); - Assert.Equal(expectedLastIndexOfValue, searchSpaceSpan.LastIndexOf(valueSpan)); - - ReadOnlySpan byteSearchSpaceSpan = MemoryMarshal.Cast(searchSpaceSpan); - ReadOnlySpan byteValueSpan = MemoryMarshal.Cast(valueSpan); - - int expectedIndexOfValueByte = expectedIndexOfValue == -1 ? -1 : expectedIndexOfValue * 2; - int expectedLastIndexOfValueByte = expectedLastIndexOfValue == -1 ? -1 : expectedLastIndexOfValue * 2; - - Assert.Equal(expectedIndexOfValueByte, byteSearchSpaceSpan.IndexOf(byteValueSpan)); - Assert.Equal(expectedLastIndexOfValueByte, byteSearchSpaceSpan.LastIndexOf(byteValueSpan)); + Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); + Assert.Equal(expectedLastIndexOfValue, searchSpace.AsSpan().LastIndexOf(value)); } } } diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs index bda626153a5d09..e5ebfe4bfdd89e 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs @@ -115,5 +115,104 @@ public static void IndexOfSequenceLengthOneValueJustPasttVeryEnd_Char() int index = span.IndexOf(value); Assert.Equal(-1, index); } + + public static IEnumerable IndexOfSubSeqData() + { + // searchSpace, value, expected IndexOf value, expected LastIndexOf value + yield return new object[] { "11111", "111", 0, 2 }; + yield return new object[] { "1111111111", "1x1", -1, -1 }; + yield return new object[] { "1111111111", "111", 0, 7 }; + yield return new object[] { "11111111111x12111", "1x121", 10, 10 }; + yield return new object[] { "11111111111x12111", "11121", -1, -1 }; + yield return new object[] { "1111111111x121111", "11121", -1, -1 }; + yield return new object[] { "11111x12111111111", "11121", -1, -1 }; + yield return new object[] { "11111111111x12111", "1x211", -1, -1 }; + yield return new object[] { "11111111111x12111", "11211", -1, -1 }; + yield return new object[] { "1111111111x121111", "11211", -1, -1 }; + yield return new object[] { "11111x12111111111", "11211", -1, -1 }; + yield return new object[] { "11111111111x12111", "12111", 12, 12 }; + yield return new object[] { "1111111111x121111", "12111", 11, 11 }; + yield return new object[] { "11111x12111111111", "12111", 6, 6 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111", 0, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111", 0, 43 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111", 7, 42 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111", 7, 41 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111", 7, 10 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111", 7, 9 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111", 7, 7 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1211", 5, 48 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11121", 44, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "121111", 5, 19 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "12111211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121121111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11122111112111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111211111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111211111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111211111121111111", -1, -1 }; + yield return new object[] { "жжжжжжжжжжжжжж", "жжж", 0, 11 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "ж0ж", -1, -1 }; + yield return new object[] { "жжжжжаааааааааааааааччччс", "ччччс", 20, 20 }; + yield return new object[] { "жжжжжаааааааааааааааччччсссссссчччч", "чччч", 20, 31 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "1112", -1, -1 }; + yield return new object[] { "0уза0оцущ0оаз0щцуоазщцуо0азщцуоазщоц0узозцуоазуоцз0щауцз0оазцо", "0оаз0", 9, 9 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababb", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "babababababababababababababababc", "bb", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "bbb", 29, 29 }; + yield return new object[] { "abababababababababababababababbbc", "bbb", 29, 29 }; + yield return new object[] { "bbbbabababababababababababababababc", "bbb", 0, 1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababb", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "babababababababababababababababc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "aaa", -1, -1 }; + yield return new object[] { "abababababababababababababababbbc", "aaa", -1, -1 }; + yield return new object[] { "bbbbabababababababababababababababc", "aaa", -1, -1 }; + yield return new object[] { "ababababababababababababababababbc", "abaa", -1, -1 }; + yield return new object[] { "babbbabababababababababababababababc", "babb", 0, 0 }; + yield return new object[] { "babbbabababababababababababababababc", "сaсс", -1, -1 }; + yield return new object[] { "babbbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 15 }; + yield return new object[] { "babbbbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 17 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 18, 32 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "bbbbbbbbbbbbb", 20, 20 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbb", -1, -1 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "xxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 60, 60 }; + yield return new object[] { "xxxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 0, 0 }; + } + + [Theory] + [MemberData(nameof(IndexOfSubSeqData))] + public static void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) + { + Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); + Assert.Equal(expectedLastIndexOfValue, searchSpace.AsSpan().LastIndexOf(value)); + } } } From 7c3b83487f20d2a7e836277e6a1fa2314d6297a5 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 25 Jan 2022 15:12:20 +0300 Subject: [PATCH 38/39] Fix build --- .../System.Memory/tests/Span/IndexOfSequence.byte.cs | 4 ++-- .../System.Memory/tests/Span/IndexOfSequence.char.cs | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index d100e0a18dc61c..8798b4085c2969 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -118,7 +118,7 @@ public static void IndexOfSequenceLengthOneValueJustPasttVeryEnd_Byte() Assert.Equal(-1, index); } - public static IEnumerable IndexOfSubSeqData() + public static IEnumerable IndexOfSubSeqData_Byte() { // searchSpace, value, expected IndexOf value, expected LastIndexOf value yield return new object[] { new byte[]{0,0,0,0,0},new byte[]{0,0,0}, 0, 2}; @@ -211,7 +211,7 @@ public static IEnumerable IndexOfSubSeqData() [Theory] [MemberData(nameof(IndexOfSubSeqData))] - public static void ValueStartsAndEndsWithTheSameChars(byte[] searchSpace, byte[] value, int expectedIndexOfValue, int expectedLastIndexOfValue) + public static void ValueStartsAndEndsWithTheSameBytes(byte[] searchSpace, byte[] value, int expectedIndexOfValue, int expectedLastIndexOfValue) { Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); Assert.Equal(expectedLastIndexOfValue, searchSpace.AsSpan().LastIndexOf(value)); diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs index e5ebfe4bfdd89e..b341a79c9b1f33 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using Xunit; +using System.Collections.Generic; +using System.Runtime.InteropServices; namespace System.SpanTests { @@ -116,7 +118,7 @@ public static void IndexOfSequenceLengthOneValueJustPasttVeryEnd_Char() Assert.Equal(-1, index); } - public static IEnumerable IndexOfSubSeqData() + public static IEnumerable IndexOfSubSeqData_Char() { // searchSpace, value, expected IndexOf value, expected LastIndexOf value yield return new object[] { "11111", "111", 0, 2 }; @@ -208,7 +210,7 @@ public static IEnumerable IndexOfSubSeqData() } [Theory] - [MemberData(nameof(IndexOfSubSeqData))] + [MemberData(nameof(IndexOfSubSeqData_Char))] public static void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) { Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); From c68a07a6a6f2d235938d0f47a26f0b6d3a281060 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 25 Jan 2022 16:01:35 +0300 Subject: [PATCH 39/39] Update IndexOfSequence.byte.cs --- src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index 8798b4085c2969..1a9027d0fc7447 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -210,7 +210,7 @@ public static IEnumerable IndexOfSubSeqData_Byte() } [Theory] - [MemberData(nameof(IndexOfSubSeqData))] + [MemberData(nameof(IndexOfSubSeqData_Byte))] public static void ValueStartsAndEndsWithTheSameBytes(byte[] searchSpace, byte[] value, int expectedIndexOfValue, int expectedLastIndexOfValue) { Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value));