Skip to content

Commit

Permalink
Vectorize CharacterMap.IndexOfOpeningCharacter
Browse files Browse the repository at this point in the history
  • Loading branch information
MihaZupan committed Apr 15, 2022
1 parent ccf455d commit da756f4
Showing 1 changed file with 85 additions and 0 deletions.
85 changes: 85 additions & 0 deletions src/Markdig/Helpers/CharacterMap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
#if NETCOREAPP3_1_OR_GREATER
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace Markdig.Helpers
{
Expand All @@ -16,6 +21,10 @@ namespace Markdig.Helpers
/// <typeparam name="T"></typeparam>
public sealed class CharacterMap<T> where T : class
{
#if NETCOREAPP3_1_OR_GREATER
private readonly Vector128<byte> _asciiBitmap;
#endif

private readonly T[] asciiMap;
private readonly Dictionary<uint, T>? nonAsciiMap;
private readonly BoolVector128 isOpeningCharacter;
Expand All @@ -39,6 +48,11 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
if (openingChar < 128)
{
maxChar = Math.Max(maxChar, openingChar);

if (openingChar == 0)
{
ThrowHelper.ArgumentOutOfRangeException("Null is not a valid opening character.", nameof(maps));
}
}
else
{
Expand All @@ -64,6 +78,23 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
nonAsciiMap[openingChar] = state.Value;
}
}

#if NETCOREAPP3_1_OR_GREATER
if (nonAsciiMap is null)
{
long bitmap_0_3 = 0;
long bitmap_4_7 = 0;

foreach (char openingChar in OpeningCharacters)
{
int position = (openingChar >> 4) | ((openingChar & 0x0F) << 3);
if (position < 64) bitmap_0_3 |= 1L << position;
else bitmap_4_7 |= 1L << (position - 64);
}

_asciiBitmap = Vector128.Create(bitmap_0_3, bitmap_4_7).AsByte();
}
#endif
}

/// <summary>
Expand Down Expand Up @@ -105,9 +136,63 @@ public T? this[uint openingChar]
/// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
public int IndexOfOpeningCharacter(string text, int start, int end)
{
Debug.Assert(text is not null);
Debug.Assert(start >= 0 && end >= 0);
Debug.Assert(end - start + 1 >= 0);
Debug.Assert(end - start + 1 <= text.Length);

if (nonAsciiMap is null)
{
#if NETCOREAPP3_1_OR_GREATER
if (Ssse3.IsSupported && BitConverter.IsLittleEndian)
{
// Based on http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
// Optimized for sets in the [1, 127] range

int lengthMinusOne = end - start;
int charsToProcessVectorized = lengthMinusOne & ~(2 * Vector128<short>.Count - 1);
int finalStart = start + charsToProcessVectorized;

if (start < finalStart)
{
ref char textStartRef = ref Unsafe.Add(ref Unsafe.AsRef(in text.GetPinnableReference()), start);
Vector128<byte> bitmap = _asciiBitmap;
do
{
// Load 32 bytes (16 chars) into two Vector128<short>s (chars)
// Drop the high byte of each char
// Pack the remaining bytes into a single Vector128<byte>
Vector128<byte> input = Sse2.PackUnsignedSaturate(
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref textStartRef)),
Unsafe.ReadUnaligned<Vector128<short>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref textStartRef, Vector128<short>.Count))));

// Extract the higher nibble of each character ((input >> 4) & 0xF)
Vector128<byte> higherNibbles = Sse2.And(Sse2.ShiftRightLogical(input.AsUInt16(), 4).AsByte(), Vector128.Create((byte)0xF));

// Lookup the matching higher nibble for each character based on the lower nibble
// PSHUFB will set the result to 0 for any non-ASCII (> 127) character
Vector128<byte> bitsets = Ssse3.Shuffle(bitmap, input);

// Calculate a bitmask (1 << (higherNibble % 8)) for each character
Vector128<byte> bitmask = Ssse3.Shuffle(Vector128.Create(0x8040201008040201).AsByte(), higherNibbles);

// Check which characters are present in the set
// We are relying on bitsets being zero for non-ASCII characters
Vector128<byte> result = Sse2.And(bitsets, bitmask);

if (!result.Equals(Vector128<byte>.Zero))
{
int resultMask = ~Sse2.MoveMask(Sse2.CompareEqual(result, Vector128<byte>.Zero));
return start + BitOperations.TrailingZeroCount((uint)resultMask);
}

start += 2 * Vector128<short>.Count;
textStartRef = ref Unsafe.Add(ref textStartRef, 2 * Vector128<short>.Count);
}
while (start != finalStart);
}
}

ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (; start <= end; start++)
{
Expand Down

0 comments on commit da756f4

Please sign in to comment.