dotnet · tannergooding · Sep 20, 2018 · Sep 19, 2018 · Sep 19, 2018 · Sep 19, 2018
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx2.PlatformNotSupported.cs
@@ -101,6 +101,54 @@ internal Avx2() { }
         /// </summary>
         public static Vector256<sbyte> AlignRight(Vector256<sbyte> left, Vector256<sbyte> right, byte mask) { throw new PlatformNotSupportedException(); }
 
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// </summary>
+        public static Vector256<byte> AlignRight(Vector256<byte> left, Vector256<byte> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<short> AlignRight(Vector256<short> left, Vector256<short> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<ushort> AlignRight(Vector256<ushort> left, Vector256<ushort> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<int> AlignRight(Vector256<int> left, Vector256<int> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<uint> AlignRight(Vector256<uint> left, Vector256<uint> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<long> AlignRight(Vector256<long> left, Vector256<long> right, byte mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<ulong> AlignRight(Vector256<ulong> left, Vector256<ulong> right, byte mask) { throw new PlatformNotSupportedException(); }
+
         /// <summary>
         /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
         ///   VPAND ymm, ymm, ymm/m256
@@ -227,15 +275,54 @@ internal Avx2() { }
 
         /// <summary>
         /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   PBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
         /// </summary>
         public static Vector256<sbyte> BlendVariable(Vector256<sbyte> left, Vector256<sbyte> right, Vector256<sbyte> mask) { throw new PlatformNotSupportedException(); }
         /// <summary>
         /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   PBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
         /// </summary>
         public static Vector256<byte> BlendVariable(Vector256<byte> left, Vector256<byte> right, Vector256<byte> mask) { throw new PlatformNotSupportedException(); }
 
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<short> BlendVariable(Vector256<short> left, Vector256<short> right, Vector256<short> mask) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<ushort> BlendVariable(Vector256<ushort> left, Vector256<ushort> right, Vector256<ushort> mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<int> BlendVariable(Vector256<int> left, Vector256<int> right, Vector256<int> mask) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<uint> BlendVariable(Vector256<uint> left, Vector256<uint> right, Vector256<uint> mask) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<long> BlendVariable(Vector256<long> left, Vector256<long> right, Vector256<long> mask) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<ulong> BlendVariable(Vector256<ulong> left, Vector256<ulong> right, Vector256<ulong> mask) { throw new PlatformNotSupportedException(); }
+
         /// <summary>
         /// __m128i _mm_broadcastb_epi8 (__m128i a)
         ///   VPBROADCASTB xmm, xmm
@@ -1149,11 +1236,22 @@ internal Avx2() { }
         ///   VPMULLW ymm, ymm, ymm/m256
         /// </summary>
         public static Vector256<short> MultiplyLow(Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
+        ///   VPMULLW ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<ushort> MultiplyLow(Vector256<ushort> left, Vector256<ushort> right) { throw new PlatformNotSupportedException(); }
+
         /// <summary>
         /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
         ///   VPMULLD ymm, ymm, ymm/m256
         /// </summary>
         public static Vector256<int> MultiplyLow(Vector256<int> left, Vector256<int> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
+        ///   VPMULLD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<uint> MultiplyLow(Vector256<uint> left, Vector256<uint> right) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m256i _mm256_or_si256 (__m256i a, __m256i b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx2.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Avx2.cs
@@ -101,6 +101,54 @@ internal Avx2() { }
         /// </summary>
         public static Vector256<sbyte> AlignRight(Vector256<sbyte> left, Vector256<sbyte> right, byte mask) => AlignRight(left, right, mask);
 
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// </summary>
+        public static Vector256<byte> AlignRight(Vector256<byte> left, Vector256<byte> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<short> AlignRight(Vector256<short> left, Vector256<short> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<ushort> AlignRight(Vector256<ushort> left, Vector256<ushort> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<int> AlignRight(Vector256<int> left, Vector256<int> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<uint> AlignRight(Vector256<uint> left, Vector256<uint> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<long> AlignRight(Vector256<long> left, Vector256<long> right, byte mask) => AlignRight(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_alignr_epi8 (__m256i a, __m256i b, const int count)
+        ///   VPALIGNR ymm, ymm, ymm/m256, imm8
+        /// This intrinsic generates VPALIGNR that operates over bytes rather than elements of the vectors.
+        /// </summary>
+        public static Vector256<ulong> AlignRight(Vector256<ulong> left, Vector256<ulong> right, byte mask) => AlignRight(left, right, mask);
+
         /// <summary>
         /// __m256i _mm256_and_si256 (__m256i a, __m256i b)
         ///   VPAND ymm, ymm, ymm/m256
@@ -227,15 +275,54 @@ internal Avx2() { }
 
         /// <summary>
         /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   PBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
         /// </summary>
         public static Vector256<sbyte> BlendVariable(Vector256<sbyte> left, Vector256<sbyte> right, Vector256<sbyte> mask) => BlendVariable(left, right, mask);
         /// <summary>
         /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
-        ///   PBLENDVB ymm, ymm, ymm/m256, ymm
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
         /// </summary>
         public static Vector256<byte> BlendVariable(Vector256<byte> left, Vector256<byte> right, Vector256<byte> mask) => BlendVariable(left, right, mask);
 
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<short> BlendVariable(Vector256<short> left, Vector256<short> right, Vector256<short> mask) => BlendVariable(left, right, mask);
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<ushort> BlendVariable(Vector256<ushort> left, Vector256<ushort> right, Vector256<ushort> mask) => BlendVariable(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<int> BlendVariable(Vector256<int> left, Vector256<int> right, Vector256<int> mask) => BlendVariable(left, right, mask);
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<uint> BlendVariable(Vector256<uint> left, Vector256<uint> right, Vector256<uint> mask) => BlendVariable(left, right, mask);
+
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<long> BlendVariable(Vector256<long> left, Vector256<long> right, Vector256<long> mask) => BlendVariable(left, right, mask);
+        /// <summary>
+        /// __m256i _mm256_blendv_epi8 (__m256i a, __m256i b, __m256i mask)
+        ///   VPBLENDVB ymm, ymm, ymm/m256, ymm
+        /// This intrinsic generates VPBLENDVB that needs a BYTE mask-vector, so users should correctly set each mask byte for the selected elements.
+        /// </summary>
+        public static Vector256<ulong> BlendVariable(Vector256<ulong> left, Vector256<ulong> right, Vector256<ulong> mask) => BlendVariable(left, right, mask);
+
         /// <summary>
         /// __m128i _mm_broadcastb_epi8 (__m128i a)
         ///   VPBROADCASTB xmm, xmm
@@ -1155,11 +1242,22 @@ public static Vector256<T> BroadcastScalarToVector256<T>(Vector128<T> value) whe
         ///   VPMULLW ymm, ymm, ymm/m256
         /// </summary>
         public static Vector256<short> MultiplyLow(Vector256<short> left, Vector256<short> right) => MultiplyLow(left, right);
+        /// <summary>
+        /// __m256i _mm256_mullo_epi16 (__m256i a, __m256i b)
+        ///   VPMULLW ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<ushort> MultiplyLow(Vector256<ushort> left, Vector256<ushort> right) => MultiplyLow(left, right);
+
         /// <summary>
         /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
         ///   VPMULLD ymm, ymm, ymm/m256
         /// </summary>
         public static Vector256<int> MultiplyLow(Vector256<int> left, Vector256<int> right) => MultiplyLow(left, right);
+        /// <summary>
+        /// __m256i _mm256_mullo_epi32 (__m256i a, __m256i b)
+        ///   VPMULLD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<uint> MultiplyLow(Vector256<uint> left, Vector256<uint> right) => MultiplyLow(left, right);
 
         /// <summary>
         /// __m256i _mm256_or_si256 (__m256i a, __m256i b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.PlatformNotSupported.cs
@@ -900,6 +900,11 @@ internal Sse2() { }
         ///   PMULLW xmm, xmm/m128
         /// </summary>
         public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
+        /// <summary>
+        /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
+        ///   PMULLW xmm, xmm/m128
+        /// </summary>
+        public static Vector128<ushort> MultiplyLow(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
 
         /// <summary>
         /// __m128i _mm_or_si128 (__m128i a,  __m128i b)

diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse2.cs
@@ -901,6 +901,11 @@ internal Sse2() { }
         ///   PMULLW xmm, xmm/m128
         /// </summary>
         public static Vector128<short> MultiplyLow(Vector128<short> left, Vector128<short> right) => MultiplyLow(left, right);
+        /// <summary>
+        /// __m128i _mm_mullo_epi16 (__m128i a,  __m128i b)
+        ///   PMULLW xmm, xmm/m128
+        /// </summary>
+        public static Vector128<ushort> MultiplyLow(Vector128<ushort> left, Vector128<ushort> right) => MultiplyLow(left, right);
 
         /// <summary>
         /// __m128i _mm_or_si128 (__m128i a,  __m128i b)