From b4dbad608e5522ba18d3f0bd4d1f754ba9a5414a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2024 13:22:20 -0800 Subject: [PATCH 1/7] Adding vectorized implementations of Exp to Vector64/128/256/512 --- .../System/Runtime/Intrinsics/Vector128.cs | 44 +++ .../System/Runtime/Intrinsics/Vector256.cs | 44 +++ .../System/Runtime/Intrinsics/Vector512.cs | 44 +++ .../src/System/Runtime/Intrinsics/Vector64.cs | 44 +++ .../System/Runtime/Intrinsics/VectorMath.cs | 322 +++++++++++++++++- .../ref/System.Runtime.Intrinsics.cs | 8 + .../tests/Vectors/Vector128Tests.cs | 16 + .../tests/Vectors/Vector256Tests.cs | 16 + .../tests/Vectors/Vector512Tests.cs | 16 + .../tests/Vectors/Vector64Tests.cs | 16 + .../tests/Vectors/VectorTestMemberData.cs | 80 +++++ 11 files changed, 642 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 0342f1b144aac..db3e30f6d96d5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1426,6 +1426,50 @@ public static bool EqualsAny(Vector128 left, Vector128 right) || Vector64.EqualsAny(left._upper, right._upper); } + internal static Vector128 Exp(Vector128 vector) + where T : IExponentialFunctions + { + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < Vector128.Count; index++) + { + T value = T.Exp(vector.GetElement(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector128 Exp(Vector128 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpDouble, Vector128, Vector128>(vector); + } + else + { + return Exp(vector); + } + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector128 Exp(Vector128 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpSingle, Vector128, Vector128, Vector128>(vector); + } + else + { + return Exp(vector); + } + } + /// Extracts the most significant bit from each element in a vector. /// The type of the elements in the vector. /// The vector whose elements should have their most significant bit extracted. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 9a5422e238518..25de2fc09fc5e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1402,6 +1402,50 @@ public static bool EqualsAny(Vector256 left, Vector256 right) || Vector128.EqualsAny(left._upper, right._upper); } + internal static Vector256 Exp(Vector256 vector) + where T : IExponentialFunctions + { + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < Vector256.Count; index++) + { + T value = T.Exp(vector.GetElement(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector256 Exp(Vector256 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpDouble, Vector256, Vector256>(vector); + } + else + { + return Exp(vector); + } + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector256 Exp(Vector256 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpSingle, Vector256, Vector256, Vector256>(vector); + } + else + { + return Exp(vector); + } + } + /// Extracts the most significant bit from each element in a vector. /// The vector whose elements should have their most significant bit extracted. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 42685879ebb05..c314043b00714 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1453,6 +1453,50 @@ public static bool EqualsAny(Vector512 left, Vector512 right) || Vector256.EqualsAny(left._upper, right._upper); } + internal static Vector512 Exp(Vector512 vector) + where T : IExponentialFunctions + { + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < Vector512.Count; index++) + { + T value = T.Exp(vector.GetElement(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector512 Exp(Vector512 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpDouble, Vector512, Vector512>(vector); + } + else + { + return Exp(vector); + } + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector512 Exp(Vector512 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpSingle, Vector512, Vector512, Vector512>(vector); + } + else + { + return Exp(vector); + } + } + /// Extracts the most significant bit from each element in a vector. /// The vector whose elements should have their most significant bit extracted. /// The type of the elements in the vector. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index e3e4f8b3b40b0..7acaed2a15b0b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1156,6 +1156,50 @@ public static bool EqualsAny(Vector64 left, Vector64 right) return false; } + internal static Vector64 Exp(Vector64 vector) + where T : IExponentialFunctions + { + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < Vector64.Count; index++) + { + T value = T.Exp(vector.GetElement(index)); + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector64 Exp(Vector64 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpDouble, Vector64, Vector64>(vector); + } + else + { + return Exp(vector); + } + } + + /// Computes the Exp of each element in a vector. + /// The vector that will have its Exp computed. + /// A vector whose elements are the Exp of the elements in . + public static Vector64 Exp(Vector64 vector) + { + if (IsHardwareAccelerated) + { + return VectorMath.ExpSingle, Vector64, Vector64, Vector64>(vector); + } + else + { + return Exp(vector); + } + } + /// Extracts the most significant bit from each element in a vector. /// The type of the elements in the vector. /// The vector whose elements should have their most significant bit extracted. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs index 20e5cf83bc47a..125c76d278a8b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/VectorMath.cs @@ -1,12 +1,248 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; namespace System.Runtime.Intrinsics { internal static class VectorMath { + public static TVectorDouble ExpDouble(TVectorDouble x) + where TVectorDouble : unmanaged, ISimdVector + where TVectorUInt64 : unmanaged, ISimdVector + { + // This code is based on `vrd2_exp` from amd/aocl-libm-ose + // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation Notes + // ---------------------- + // 1. Argument Reduction: + // e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64) --- (1) + // + // Choose 'n' and 'f', such that + // x * 64/ln2 = n + f --- (2) | n is integer + // | |f| <= 0.5 + // Choose 'm' and 'j' such that, + // n = (64 * m) + j --- (3) + // + // From (1), (2) and (3), + // e^x = 2^((64*m + j + f)/64) + // = (2^m) * (2^(j/64)) * 2^(f/64) + // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64)) + // + // 2. Table Lookup + // Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63 + // + // 3. Polynomial Evaluation + // From (2), + // f = x*(64/ln(2)) - n + // Let, + // r = f*(ln(2)/64) = x - n*(ln(2)/64) + // + // 4. Reconstruction + // Thus, + // e^x = (2^m) * (2^(j/64)) * e^r + + const ulong V_ARG_MAX = 0x40862000_00000000; + const ulong V_DP64_BIAS = 1023; + + const double V_EXPF_MIN = -709.782712893384; + const double V_EXPF_MAX = +709.782712893384; + + const double V_EXPF_HUGE = 6755399441055744; + const double V_TBL_LN2 = 1.4426950408889634; + + const double V_LN2_HEAD = +0.693359375; + const double V_LN2_TAIL = -0.00021219444005469057; + + const double C3 = 0.5000000000000018; + const double C4 = 0.1666666666666617; + const double C5 = 0.04166666666649277; + const double C6 = 0.008333333333559272; + const double C7 = 0.001388888895122404; + const double C8 = 0.00019841269432677495; + const double C9 = 2.4801486521374483E-05; + const double C10 = 2.7557622532543023E-06; + const double C11 = 2.7632293298250954E-07; + const double C12 = 2.499430431958571E-08; + + // x * (64.0 / ln(2)) + TVectorDouble z = x * TVectorDouble.Create(V_TBL_LN2); + + TVectorDouble dn = z + TVectorDouble.Create(V_EXPF_HUGE); + + // n = (int)z + TVectorUInt64 n = Unsafe.BitCast(dn); + + // dn = (double)n + dn -= TVectorDouble.Create(V_EXPF_HUGE); + + // r = x - (dn * (ln(2) / 64)) + // where ln(2) / 64 is split into Head and Tail values + TVectorDouble r = x - (dn * TVectorDouble.Create(V_LN2_HEAD)) - (dn * TVectorDouble.Create(V_LN2_TAIL)); + + TVectorDouble r2 = r * r; + TVectorDouble r4 = r2 * r2; + TVectorDouble r8 = r4 * r4; + + // Compute polynomial + TVectorDouble poly = ((TVectorDouble.Create(C12) * r + TVectorDouble.Create(C11)) * r2 + + TVectorDouble.Create(C10) * r + TVectorDouble.Create(C9)) * r8 + + ((TVectorDouble.Create(C8) * r + TVectorDouble.Create(C7)) * r2 + + (TVectorDouble.Create(C6) * r + TVectorDouble.Create(C5))) * r4 + + ((TVectorDouble.Create(C4) * r + TVectorDouble.Create(C3)) * r2 + (r + TVectorDouble.One)); + + // m = (n - j) / 64 + // result = polynomial * 2^m + TVectorDouble ret = poly * Unsafe.BitCast((n + TVectorUInt64.Create(V_DP64_BIAS)) << 52); + + // Check if -709 < vx < 709 + if (TVectorUInt64.GreaterThanAny(Unsafe.BitCast(TVectorDouble.Abs(x)), TVectorUInt64.Create(V_ARG_MAX))) + { + // (x > V_EXPF_MAX) ? double.PositiveInfinity : x + TVectorDouble infinityMask = TVectorDouble.GreaterThan(x, TVectorDouble.Create(V_EXPF_MAX)); + + ret = TVectorDouble.ConditionalSelect( + infinityMask, + TVectorDouble.Create(double.PositiveInfinity), + ret + ); + + // (x < V_EXPF_MIN) ? 0 : x + ret = TVectorDouble.AndNot(ret, TVectorDouble.LessThan(x, TVectorDouble.Create(V_EXPF_MIN))); + } + + return ret; + } + + public static TVectorSingle ExpSingle(TVectorSingle x) + where TVectorSingle : unmanaged, ISimdVector + where TVectorUInt32 : unmanaged, ISimdVector + where TVectorDouble : unmanaged, ISimdVector + where TVectorUInt64 : unmanaged, ISimdVector + { + // This code is based on `vrs4_expf` from amd/aocl-libm-ose + // Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation Notes: + // 1. Argument Reduction: + // e^x = 2^(x/ln2) --- (1) + // + // Let x/ln(2) = z --- (2) + // + // Let z = n + r , where n is an integer --- (3) + // |r| <= 1/2 + // + // From (1), (2) and (3), + // e^x = 2^z + // = 2^(N+r) + // = (2^N)*(2^r) --- (4) + // + // 2. Polynomial Evaluation + // From (4), + // r = z - N + // 2^r = C1 + C2*r + C3*r^2 + C4*r^3 + C5 *r^4 + C6*r^5 + // + // 4. Reconstruction + // Thus, + // e^x = (2^N) * (2^r) + + const uint V_ARG_MAX = 0x42AE0000; + + const float V_EXPF_MIN = -103.97208f; + const float V_EXPF_MAX = +88.72284f; + + const double V_EXPF_HUGE = 6755399441055744; + const double V_TBL_LN2 = 1.4426950408889634; + + const double C1 = 1.0000000754895704; + const double C2 = 0.6931472254087585; + const double C3 = 0.2402210737432219; + const double C4 = 0.05550297297702539; + const double C5 = 0.009676036358193323; + const double C6 = 0.001341000536524434; + + // Convert x to double precision + (TVectorDouble xl, TVectorDouble xu) = Widen(x); + + // x * (64.0 / ln(2)) + TVectorDouble v_tbl_ln2 = TVectorDouble.Create(V_TBL_LN2); + + TVectorDouble zl = xl * v_tbl_ln2; + TVectorDouble zu = xu * v_tbl_ln2; + + TVectorDouble v_expf_huge = TVectorDouble.Create(V_EXPF_HUGE); + + TVectorDouble dnl = zl + v_expf_huge; + TVectorDouble dnu = zu + v_expf_huge; + + // n = (int)z + TVectorUInt64 nl = Unsafe.BitCast(dnl); + TVectorUInt64 nu = Unsafe.BitCast(dnu); + + // dn = (double)n + dnl -= v_expf_huge; + dnu -= v_expf_huge; + + // r = z - dn + TVectorDouble c1 = TVectorDouble.Create(C1); + TVectorDouble c2 = TVectorDouble.Create(C2); + TVectorDouble c3 = TVectorDouble.Create(C3); + TVectorDouble c4 = TVectorDouble.Create(C4); + TVectorDouble c5 = TVectorDouble.Create(C5); + TVectorDouble c6 = TVectorDouble.Create(C6); + + TVectorDouble rl = zl - dnl; + + TVectorDouble rl2 = rl * rl; + TVectorDouble rl4 = rl2 * rl2; + + TVectorDouble polyl = (c4 * rl + c3) * rl2 + + ((c6 * rl + c5) * rl4 + + (c2 * rl + c1)); + + + TVectorDouble ru = zu - dnu; + + TVectorDouble ru2 = ru * ru; + TVectorDouble ru4 = ru2 * ru2; + + TVectorDouble polyu = (c4 * ru + c3) * ru2 + + ((c6 * ru + c5) * ru4 + + (c2 * ru + c1)); + + // result = (float)(poly + (n << 52)) + TVectorSingle ret = Narrow( + Unsafe.BitCast(Unsafe.BitCast(polyl) + (nl << 52)), + Unsafe.BitCast(Unsafe.BitCast(polyu) + (nu << 52)) + ); + + // Check if -103 < |x| < 88 + if (TVectorUInt32.GreaterThanAny(Unsafe.BitCast(TVectorSingle.Abs(x)), TVectorUInt32.Create(V_ARG_MAX))) + { + // (x > V_EXPF_MAX) ? float.PositiveInfinity : x + TVectorSingle infinityMask = TVectorSingle.GreaterThan(x, TVectorSingle.Create(V_EXPF_MAX)); + + ret = TVectorSingle.ConditionalSelect( + infinityMask, + TVectorSingle.Create(float.PositiveInfinity), + ret + ); + + // (x < V_EXPF_MIN) ? 0 : x + ret = TVectorSingle.AndNot(ret, TVectorSingle.LessThan(x, TVectorSingle.Create(V_EXPF_MIN))); + } + + return ret; + } + public static TVectorDouble LogDouble(TVectorDouble x) where TVectorDouble : unmanaged, ISimdVector where TVectorInt64 : unmanaged, ISimdVector @@ -578,19 +814,19 @@ private static TVectorDouble ConvertToDouble(TVecto if (typeof(TVectorInt64) == typeof(Vector64)) { - return (TVectorDouble)(object)Vector64.ConvertToDouble((Vector64)(object)vector); + result = (TVectorDouble)(object)Vector64.ConvertToDouble((Vector64)(object)vector); } else if (typeof(TVectorInt64) == typeof(Vector128)) { - return (TVectorDouble)(object)Vector128.ConvertToDouble((Vector128)(object)vector); + result = (TVectorDouble)(object)Vector128.ConvertToDouble((Vector128)(object)vector); } else if (typeof(TVectorInt64) == typeof(Vector256)) { - return (TVectorDouble)(object)Vector256.ConvertToDouble((Vector256)(object)vector); + result = (TVectorDouble)(object)Vector256.ConvertToDouble((Vector256)(object)vector); } else if (typeof(TVectorInt64) == typeof(Vector512)) { - return (TVectorDouble)(object)Vector512.ConvertToDouble((Vector512)(object)vector); + result = (TVectorDouble)(object)Vector512.ConvertToDouble((Vector512)(object)vector); } else { @@ -609,19 +845,89 @@ private static TVectorSingle ConvertToSingle(TVecto if (typeof(TVectorInt32) == typeof(Vector64)) { - return (TVectorSingle)(object)Vector64.ConvertToSingle((Vector64)(object)vector); + result = (TVectorSingle)(object)Vector64.ConvertToSingle((Vector64)(object)vector); } else if (typeof(TVectorInt32) == typeof(Vector128)) { - return (TVectorSingle)(object)Vector128.ConvertToSingle((Vector128)(object)vector); + result = (TVectorSingle)(object)Vector128.ConvertToSingle((Vector128)(object)vector); } else if (typeof(TVectorInt32) == typeof(Vector256)) { - return (TVectorSingle)(object)Vector256.ConvertToSingle((Vector256)(object)vector); + result = (TVectorSingle)(object)Vector256.ConvertToSingle((Vector256)(object)vector); } else if (typeof(TVectorInt32) == typeof(Vector512)) { - return (TVectorSingle)(object)Vector512.ConvertToSingle((Vector512)(object)vector); + result = (TVectorSingle)(object)Vector512.ConvertToSingle((Vector512)(object)vector); + } + else + { + ThrowHelper.ThrowNotSupportedException(); + } + + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static TVectorSingle Narrow(TVectorDouble lower, TVectorDouble upper) + where TVectorDouble : unmanaged, ISimdVector + where TVectorSingle : unmanaged, ISimdVector + { + Unsafe.SkipInit(out TVectorSingle result); + + if (typeof(TVectorDouble) == typeof(Vector64)) + { + Debug.Assert(typeof(TVectorSingle) == typeof(Vector64)); + result = (TVectorSingle)(object)Vector64.Narrow((Vector64)(object)lower, (Vector64)(object)upper); + } + else if (typeof(TVectorDouble) == typeof(Vector128)) + { + Debug.Assert(typeof(TVectorSingle) == typeof(Vector128)); + result = (TVectorSingle)(object)Vector128.Narrow((Vector128)(object)lower, (Vector128)(object)upper); + } + else if (typeof(TVectorDouble) == typeof(Vector256)) + { + Debug.Assert(typeof(TVectorSingle) == typeof(Vector256)); + result = (TVectorSingle)(object)Vector256.Narrow((Vector256)(object)lower, (Vector256)(object)upper); + } + else if (typeof(TVectorDouble) == typeof(Vector512)) + { + Debug.Assert(typeof(TVectorSingle) == typeof(Vector512)); + result = (TVectorSingle)(object)Vector512.Narrow((Vector512)(object)lower, (Vector512)(object)upper); + } + else + { + ThrowHelper.ThrowNotSupportedException(); + } + + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static (TVectorDouble Lower, TVectorDouble Upper) Widen(TVectorSingle vector) + where TVectorSingle : unmanaged, ISimdVector + where TVectorDouble : unmanaged, ISimdVector + { + Unsafe.SkipInit(out (TVectorDouble, TVectorDouble) result); + + if (typeof(TVectorSingle) == typeof(Vector64)) + { + Debug.Assert(typeof(TVectorDouble) == typeof(Vector64)); + result = ((TVectorDouble, TVectorDouble))(object)Vector64.Widen((Vector64)(object)vector); + } + else if (typeof(TVectorSingle) == typeof(Vector128)) + { + Debug.Assert(typeof(TVectorDouble) == typeof(Vector128)); + result = ((TVectorDouble, TVectorDouble))(object)Vector128.Widen((Vector128)(object)vector); + } + else if (typeof(TVectorSingle) == typeof(Vector256)) + { + Debug.Assert(typeof(TVectorDouble) == typeof(Vector256)); + result = ((TVectorDouble, TVectorDouble))(object)Vector256.Widen((Vector256)(object)vector); + } + else if (typeof(TVectorSingle) == typeof(Vector512)) + { + Debug.Assert(typeof(TVectorDouble) == typeof(Vector512)); + result = ((TVectorDouble, TVectorDouble))(object)Vector512.Widen((Vector512)(object)vector); } else { diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 7f73e44ec3ca6..00318b5d15d38 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -155,6 +155,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, public static bool EqualsAll(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static bool EqualsAny(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 Equals(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Exp(System.Runtime.Intrinsics.Vector128 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Exp(System.Runtime.Intrinsics.Vector128 vector) { throw null; } [System.CLSCompliantAttribute(false)] public static uint ExtractMostSignificantBits(this System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Floor(System.Runtime.Intrinsics.Vector128 vector) { throw null; } @@ -486,6 +488,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, public static bool EqualsAll(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public static bool EqualsAny(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public static System.Runtime.Intrinsics.Vector256 Equals(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Exp(System.Runtime.Intrinsics.Vector256 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Exp(System.Runtime.Intrinsics.Vector256 vector) { throw null; } [System.CLSCompliantAttribute(false)] public static uint ExtractMostSignificantBits(this System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Floor(System.Runtime.Intrinsics.Vector256 vector) { throw null; } @@ -817,6 +821,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector512 vector, public static bool EqualsAll(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } public static bool EqualsAny(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } public static System.Runtime.Intrinsics.Vector512 Equals(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Exp(System.Runtime.Intrinsics.Vector512 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Exp(System.Runtime.Intrinsics.Vector512 vector) { throw null; } [System.CLSCompliantAttribute(false)] public static ulong ExtractMostSignificantBits(this System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Floor(System.Runtime.Intrinsics.Vector512 vector) { throw null; } @@ -1122,6 +1128,8 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, public static bool EqualsAll(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static bool EqualsAny(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 Equals(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Exp(System.Runtime.Intrinsics.Vector64 vector) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Exp(System.Runtime.Intrinsics.Vector64 vector) { throw null; } [System.CLSCompliantAttribute(false)] public static uint ExtractMostSignificantBits(this System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Floor(System.Runtime.Intrinsics.Vector64 vector) { throw null; } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 13ffa0292bff3..a81e6a51b81e0 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -4687,6 +4687,22 @@ private static void TestGetOne() Assert.Equal((Vector128)methodInfo.Invoke(null, null), Vector128.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + public void ExpDoubleTest(double value, double expectedResult, double variance) + { + Vector128 actualResult = Vector128.Exp(Vector128.Create(value)); + AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + public void ExpSingleTest(float value, float expectedResult, float variance) + { + Vector128 actualResult = Vector128.Exp(Vector128.Create(value)); + AssertEqual(Vector128.Create(expectedResult), actualResult, Vector128.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] public void LogDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index a74bf79d65dea..70e6eb2cee76b 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -5702,6 +5702,22 @@ private static void TestGetOne() Assert.Equal((Vector256)methodInfo.Invoke(null, null), Vector256.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + public void ExpDoubleTest(double value, double expectedResult, double variance) + { + Vector256 actualResult = Vector256.Exp(Vector256.Create(value)); + AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + public void ExpSingleTest(float value, float expectedResult, float variance) + { + Vector256 actualResult = Vector256.Exp(Vector256.Create(value)); + AssertEqual(Vector256.Create(expectedResult), actualResult, Vector256.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] public void LogDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index 01c42d277b855..580d67bb84315 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -5134,6 +5134,22 @@ private static void TestIsNotSupported() Assert.False((bool)methodInfo.Invoke(null, null)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + public void ExpDoubleTest(double value, double expectedResult, double variance) + { + Vector512 actualResult = Vector512.Exp(Vector512.Create(value)); + AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + public void ExpSingleTest(float value, float expectedResult, float variance) + { + Vector512 actualResult = Vector512.Exp(Vector512.Create(value)); + AssertEqual(Vector512.Create(expectedResult), actualResult, Vector512.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] public void LogDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index ec5a0e11118ce..0f5a03510daf8 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -4104,6 +4104,22 @@ private static void TestGetOne() Assert.Equal((Vector64)methodInfo.Invoke(null, null), Vector64.Create(T.One)); } + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + public void ExpDoubleTest(double value, double expectedResult, double variance) + { + Vector64 actualResult = Vector64.Exp(Vector64.Create(value)); + AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance)); + } + + [Theory] + [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + public void ExpSingleTest(float value, float expectedResult, float variance) + { + Vector64 actualResult = Vector64.Exp(Vector64.Create(value)); + AssertEqual(Vector64.Create(expectedResult), actualResult, Vector64.Create(variance)); + } + [Theory] [MemberData(nameof(VectorTestMemberData.LogDouble), MemberType = typeof(VectorTestMemberData))] public void LogDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs index cd2a20baa374d..7fdb5189ced4f 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs @@ -35,6 +35,86 @@ internal static class VectorTestMemberData // use CrossPlatformMachineEpsilon * 10. private const float SingleCrossPlatformMachineEpsilon = 4.76837158e-07f; + public static IEnumerable ExpDouble + { + get + { + yield return new object[] { double.NegativeInfinity, 0.0, 0.0 }; + yield return new object[] { -3.1415926535897932, 0.043213918263772250, DoubleCrossPlatformMachineEpsilon / 10 }; // value: -(pi) + yield return new object[] { -2.7182818284590452, 0.065988035845312537, DoubleCrossPlatformMachineEpsilon / 10 }; // value: -(e) + yield return new object[] { -2.3025850929940457, 0.1, DoubleCrossPlatformMachineEpsilon }; // value: -(ln(10)) + yield return new object[] { -1.5707963267948966, 0.20787957635076191, DoubleCrossPlatformMachineEpsilon }; // value: -(pi / 2) + yield return new object[] { -1.4426950408889634, 0.23629008834452270, DoubleCrossPlatformMachineEpsilon }; // value: -(log2(e)) + yield return new object[] { -1.4142135623730950, 0.24311673443421421, DoubleCrossPlatformMachineEpsilon }; // value: -(sqrt(2)) + yield return new object[] { -1.1283791670955126, 0.32355726390307110, DoubleCrossPlatformMachineEpsilon }; // value: -(2 / sqrt(pi)) + yield return new object[] { -1.0, 0.36787944117144232, DoubleCrossPlatformMachineEpsilon }; + yield return new object[] { -0.78539816339744831, 0.45593812776599624, DoubleCrossPlatformMachineEpsilon }; // value: -(pi / 4) + yield return new object[] { -0.70710678118654752, 0.49306869139523979, DoubleCrossPlatformMachineEpsilon }; // value: -(1 / sqrt(2)) + yield return new object[] { -0.69314718055994531, 0.5, 0.0 }; // value: -(ln(2)) + yield return new object[] { -0.63661977236758134, 0.52907780826773535, DoubleCrossPlatformMachineEpsilon }; // value: -(2 / pi) + yield return new object[] { -0.43429448190325183, 0.64772148514180065, DoubleCrossPlatformMachineEpsilon }; // value: -(log10(e)) + yield return new object[] { -0.31830988618379067, 0.72737734929521647, DoubleCrossPlatformMachineEpsilon }; // value: -(1 / pi) + yield return new object[] { -0.0, 1.0, 0.0 }; + yield return new object[] { double.NaN, double.NaN, 0.0 }; + yield return new object[] { 0.0, 1.0, 0.0 }; + yield return new object[] { 0.31830988618379067, 1.3748022274393586, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (1 / pi) + yield return new object[] { 0.43429448190325183, 1.5438734439711811, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (log10(e)) + yield return new object[] { 0.63661977236758134, 1.8900811645722220, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (2 / pi) + yield return new object[] { 0.69314718055994531, 2.0, 0.0 }; // value: (ln(2)) + yield return new object[] { 0.70710678118654752, 2.0281149816474725, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (1 / sqrt(2)) + yield return new object[] { 0.78539816339744831, 2.1932800507380155, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 4) + yield return new object[] { 1.0, 2.7182818284590452, DoubleCrossPlatformMachineEpsilon * 10 }; // expected: (e) + yield return new object[] { 1.1283791670955126, 3.0906430223107976, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (2 / sqrt(pi)) + yield return new object[] { 1.4142135623730950, 4.1132503787829275, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (sqrt(2)) + yield return new object[] { 1.4426950408889634, 4.2320861065570819, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (log2(e)) + yield return new object[] { 1.5707963267948966, 4.8104773809653517, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 2) + yield return new object[] { 2.3025850929940457, 10.0, 0.0 }; // value: (ln(10)) + yield return new object[] { 2.7182818284590452, 15.154262241479264, DoubleCrossPlatformMachineEpsilon * 100 }; // value: (e) + yield return new object[] { 3.1415926535897932, 23.140692632779269, DoubleCrossPlatformMachineEpsilon * 100 }; // value: (pi) + yield return new object[] { double.PositiveInfinity, double.PositiveInfinity, 0.0 }; + } + } + + public static IEnumerable ExpSingle + { + get + { + yield return new object[] { float.NegativeInfinity, 0.0f, 0.0f }; + yield return new object[] { -3.14159265f, 0.0432139183f, SingleCrossPlatformMachineEpsilon / 10 }; // value: -(pi) + yield return new object[] { -2.71828183f, 0.0659880358f, SingleCrossPlatformMachineEpsilon / 10 }; // value: -(e) + yield return new object[] { -2.30258509f, 0.1f, SingleCrossPlatformMachineEpsilon }; // value: -(ln(10)) + yield return new object[] { -1.57079633f, 0.207879576f, SingleCrossPlatformMachineEpsilon }; // value: -(pi / 2) + yield return new object[] { -1.44269504f, 0.236290088f, SingleCrossPlatformMachineEpsilon }; // value: -(log2(e)) + yield return new object[] { -1.41421356f, 0.243116734f, SingleCrossPlatformMachineEpsilon }; // value: -(sqrt(2)) + yield return new object[] { -1.12837917f, 0.323557264f, SingleCrossPlatformMachineEpsilon }; // value: -(2 / sqrt(pi)) + yield return new object[] { -1.0f, 0.367879441f, SingleCrossPlatformMachineEpsilon }; + yield return new object[] { -0.785398163f, 0.455938128f, SingleCrossPlatformMachineEpsilon }; // value: -(pi / 4) + yield return new object[] { -0.707106781f, 0.493068691f, SingleCrossPlatformMachineEpsilon }; // value: -(1 / sqrt(2)) + yield return new object[] { -0.693147181f, 0.5f, 0.0f }; // value: -(ln(2)) + yield return new object[] { -0.636619772f, 0.529077808f, SingleCrossPlatformMachineEpsilon }; // value: -(2 / pi) + yield return new object[] { -0.434294482f, 0.647721485f, SingleCrossPlatformMachineEpsilon }; // value: -(log10(e)) + yield return new object[] { -0.318309886f, 0.727377349f, SingleCrossPlatformMachineEpsilon }; // value: -(1 / pi) + yield return new object[] { -0.0f, 1.0f, 0.0f }; + yield return new object[] { float.NaN, float.NaN, 0.0f }; + yield return new object[] { 0.0f, 1.0f, 0.0f }; + yield return new object[] { 0.318309886f, 1.37480223f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (1 / pi) + yield return new object[] { 0.434294482f, 1.54387344f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (log10(e)) + yield return new object[] { 0.636619772f, 1.89008116f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (2 / pi) + yield return new object[] { 0.693147181f, 2.0f, 0.0f }; // value: (ln(2)) + yield return new object[] { 0.707106781f, 2.02811498f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (1 / sqrt(2)) + yield return new object[] { 0.785398163f, 2.19328005f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 4) + yield return new object[] { 1.0f, 2.71828183f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (e) + yield return new object[] { 1.12837917f, 3.09064302f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (2 / sqrt(pi)) + yield return new object[] { 1.41421356f, 4.11325038f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (sqrt(2)) + yield return new object[] { 1.44269504f, 4.23208611f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (log2(e)) + yield return new object[] { 1.57079633f, 4.81047738f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 2) + yield return new object[] { 2.30258509f, 10.0f, 0.0f }; // value: (ln(10)) + yield return new object[] { 2.71828183f, 15.1542622f, SingleCrossPlatformMachineEpsilon * 100 }; // value: (e) + yield return new object[] { 3.14159265f, 23.1406926f, SingleCrossPlatformMachineEpsilon * 100 }; // value: (pi) + yield return new object[] { float.PositiveInfinity, float.PositiveInfinity, 0.0f }; + } + } + public static IEnumerable LogDouble { get From 7736a0db97bc778c8b530f9faec5d2bcaa1b78e7 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2024 13:32:09 -0800 Subject: [PATCH 2/7] Accelerate TensorPrimitives.Exp for double --- .../netcore/TensorPrimitives.netcore.cs | 375 ++++++++++++++++-- 1 file changed, 335 insertions(+), 40 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 9c41234cef3f9..41560fbb8b284 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -10999,6 +10999,310 @@ public static Vector512 Invoke(Vector512 x) /// T.Exp(x) internal readonly struct ExpOperator : IUnaryOperator where T : IExponentialFunctions + { + public static bool Vectorizable => (typeof(T) == typeof(double)) + || (typeof(T) == typeof(float)); + + public static T Invoke(T x) => T.Exp(x); + + public static Vector128 Invoke(Vector128 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector128.Exp(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector128.Exp(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return ExpOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return ExpOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + + public static Vector256 Invoke(Vector256 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector256.Exp(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector256.Exp(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return ExpOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return ExpOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + + public static Vector512 Invoke(Vector512 x) + { +#if NET9_0_OR_GREATER + if (typeof(T) == typeof(double)) + { + return Vector512.Exp(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return Vector512.Exp(x.AsSingle()).As(); + } +#else + if (typeof(T) == typeof(double)) + { + return ExpOperatorDouble.Invoke(x.AsDouble()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(float)); + return ExpOperatorSingle.Invoke(x.AsSingle()).As(); + } +#endif + } + } + +#if !NET9_0_OR_GREATER + /// double.Exp(x) + internal readonly struct ExpOperatorDouble : IUnaryOperator + { + // This code is based on `vrd2_exp` from amd/aocl-libm-ose + // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation Notes + // ---------------------- + // 1. Argument Reduction: + // e^x = 2^(x/ln2) = 2^(x*(64/ln(2))/64) --- (1) + // + // Choose 'n' and 'f', such that + // x * 64/ln2 = n + f --- (2) | n is integer + // | |f| <= 0.5 + // Choose 'm' and 'j' such that, + // n = (64 * m) + j --- (3) + // + // From (1), (2) and (3), + // e^x = 2^((64*m + j + f)/64) + // = (2^m) * (2^(j/64)) * 2^(f/64) + // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64)) + // + // 2. Table Lookup + // Values of (2^(j/64)) are precomputed, j = 0, 1, 2, 3 ... 63 + // + // 3. Polynomial Evaluation + // From (2), + // f = x*(64/ln(2)) - n + // Let, + // r = f*(ln(2)/64) = x - n*(ln(2)/64) + // + // 4. Reconstruction + // Thus, + // e^x = (2^m) * (2^(j/64)) * e^r + + private const ulong V_ARG_MAX = 0x40862000_00000000; + private const ulong V_DP64_BIAS = 1023; + + private const double V_EXPF_MIN = -709.782712893384; + private const double V_EXPF_MAX = +709.782712893384; + + private const double V_EXPF_HUGE = 6755399441055744; + private const double V_TBL_LN2 = 1.4426950408889634; + + private const double V_LN2_HEAD = +0.693359375; + private const double V_LN2_TAIL = -0.00021219444005469057; + + private const double C3 = 0.5000000000000018; + private const double C4 = 0.1666666666666617; + private const double C5 = 0.04166666666649277; + private const double C6 = 0.008333333333559272; + private const double C7 = 0.001388888895122404; + private const double C8 = 0.00019841269432677495; + private const double C9 = 2.4801486521374483E-05; + private const double C10 = 2.7557622532543023E-06; + private const double C11 = 2.7632293298250954E-07; + private const double C12 = 2.499430431958571E-08; + + public static bool Vectorizable => true; + + public static double Invoke(double x) => double.Exp(x); + + public static Vector128 Invoke(Vector128 x) + { + // x * (64.0 / ln(2)) + Vector128 z = x * Vector128.Create(V_TBL_LN2); + + Vector128 dn = z + Vector128.Create(V_EXPF_HUGE); + + // n = (int)z + Vector128 n = dn.AsUInt64(); + + // dn = (double)n + dn -= Vector128.Create(V_EXPF_HUGE); + + // r = x - (dn * (ln(2) / 64)) + // where ln(2) / 64 is split into Head and Tail values + Vector128 r = x - (dn * Vector128.Create(V_LN2_HEAD)) - (dn * Vector128.Create(V_LN2_TAIL)); + + Vector128 r2 = r * r; + Vector128 r4 = r2 * r2; + Vector128 r8 = r4 * r4; + + // Compute polynomial + Vector128 poly = ((Vector128.Create(C12) * r + Vector128.Create(C11)) * r2 + + Vector128.Create(C10) * r + Vector128.Create(C9)) * r8 + + ((Vector128.Create(C8) * r + Vector128.Create(C7)) * r2 + + (Vector128.Create(C6) * r + Vector128.Create(C5))) * r4 + + ((Vector128.Create(C4) * r + Vector128.Create(C3)) * r2 + (r + Vector128.One)); + + // m = (n - j) / 64 + // result = polynomial * 2^m + Vector128 ret = poly * ((n + Vector128.Create(V_DP64_BIAS)) << 52).AsDouble(); + + // Check if -709 < vx < 709 + if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt64(), Vector128.Create(V_ARG_MAX))) + { + // (x > V_EXPF_MAX) ? double.PositiveInfinity : x + Vector128 infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX)); + + ret = Vector128.ConditionalSelect( + infinityMask, + Vector128.Create(double.PositiveInfinity), + ret + ); + + // (x < V_EXPF_MIN) ? 0 : x + ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN))); + } + + return ret; + } + + public static Vector256 Invoke(Vector256 x) + { + // x * (64.0 / ln(2)) + Vector256 z = x * Vector256.Create(V_TBL_LN2); + + Vector256 dn = z + Vector256.Create(V_EXPF_HUGE); + + // n = (int)z + Vector256 n = dn.AsUInt64(); + + // dn = (double)n + dn -= Vector256.Create(V_EXPF_HUGE); + + // r = x - (dn * (ln(2) / 64)) + // where ln(2) / 64 is split into Head and Tail values + Vector256 r = x - (dn * Vector256.Create(V_LN2_HEAD)) - (dn * Vector256.Create(V_LN2_TAIL)); + + Vector256 r2 = r * r; + Vector256 r4 = r2 * r2; + Vector256 r8 = r4 * r4; + + // Compute polynomial + Vector256 poly = ((Vector256.Create(C12) * r + Vector256.Create(C11)) * r2 + + Vector256.Create(C10) * r + Vector256.Create(C9)) * r8 + + ((Vector256.Create(C8) * r + Vector256.Create(C7)) * r2 + + (Vector256.Create(C6) * r + Vector256.Create(C5))) * r4 + + ((Vector256.Create(C4) * r + Vector256.Create(C3)) * r2 + (r + Vector256.One)); + + // m = (n - j) / 64 + // result = polynomial * 2^m + Vector256 ret = poly * ((n + Vector256.Create(V_DP64_BIAS)) << 52).AsDouble(); + + // Check if -709 < vx < 709 + if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt64(), Vector256.Create(V_ARG_MAX))) + { + // (x > V_EXPF_MAX) ? double.PositiveInfinity : x + Vector256 infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX)); + + ret = Vector256.ConditionalSelect( + infinityMask, + Vector256.Create(double.PositiveInfinity), + ret + ); + + // (x < V_EXPF_MIN) ? 0 : x + ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN))); + } + + return ret; + } + + public static Vector512 Invoke(Vector512 x) + { + // x * (64.0 / ln(2)) + Vector512 z = x * Vector512.Create(V_TBL_LN2); + + Vector512 dn = z + Vector512.Create(V_EXPF_HUGE); + + // n = (int)z + Vector512 n = dn.AsUInt64(); + + // dn = (double)n + dn -= Vector512.Create(V_EXPF_HUGE); + + // r = x - (dn * (ln(2) / 64)) + // where ln(2) / 64 is split into Head and Tail values + Vector512 r = x - (dn * Vector512.Create(V_LN2_HEAD)) - (dn * Vector512.Create(V_LN2_TAIL)); + + Vector512 r2 = r * r; + Vector512 r4 = r2 * r2; + Vector512 r8 = r4 * r4; + + // Compute polynomial + Vector512 poly = ((Vector512.Create(C12) * r + Vector512.Create(C11)) * r2 + + Vector512.Create(C10) * r + Vector512.Create(C9)) * r8 + + ((Vector512.Create(C8) * r + Vector512.Create(C7)) * r2 + + (Vector512.Create(C6) * r + Vector512.Create(C5))) * r4 + + ((Vector512.Create(C4) * r + Vector512.Create(C3)) * r2 + (r + Vector512.One)); + + // m = (n - j) / 64 + // result = polynomial * 2^m + Vector512 ret = poly * ((n + Vector512.Create(V_DP64_BIAS)) << 52).AsDouble(); + + // Check if -709 < vx < 709 + if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt64(), Vector512.Create(V_ARG_MAX))) + { + // (x > V_EXPF_MAX) ? double.PositiveInfinity : x + Vector512 infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX)); + + ret = Vector512.ConditionalSelect( + infinityMask, + Vector512.Create(double.PositiveInfinity), + ret + ); + + // (x < V_EXPF_MIN) ? 0 : x + ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN))); + } + + return ret; + } + } + + /// float.Exp(x) + internal readonly struct ExpOperatorSingle : IUnaryOperator { // This code is based on `vrs4_expf` from amd/aocl-libm-ose // Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. @@ -11030,10 +11334,9 @@ public static Vector512 Invoke(Vector512 x) // e^x = (2^N) * (2^r) private const uint V_ARG_MAX = 0x42AE0000; - private const uint V_MASK = 0x7FFFFFFF; private const float V_EXPF_MIN = -103.97208f; - private const float V_EXPF_MAX = 88.72284f; + private const float V_EXPF_MAX = +88.72284f; private const double V_EXPF_HUGE = 6755399441055744; private const double V_TBL_LN2 = 1.4426950408889634; @@ -11045,15 +11348,12 @@ public static Vector512 Invoke(Vector512 x) private const double C5 = 0.009676036358193323; private const double C6 = 0.001341000536524434; - public static bool Vectorizable => typeof(T) == typeof(float); + public static bool Vectorizable => true; - public static T Invoke(T x) => T.Exp(x); + public static float Invoke(float x) => float.Exp(x); - public static Vector128 Invoke(Vector128 t) + public static Vector128 Invoke(Vector128 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector128 x = t.AsSingle(); - // Convert x to double precision (Vector128 xl, Vector128 xu) = Vector128.Widen(x); @@ -11068,11 +11368,11 @@ public static Vector128 Invoke(Vector128 t) Vector128 dnl = zl + v_expf_huge; Vector128 dnu = zu + v_expf_huge; - // n = int (z) + // n = (int)z Vector128 nl = dnl.AsUInt64(); Vector128 nu = dnu.AsUInt64(); - // dn = double(n) + // dn = (double)n dnl -= v_expf_huge; dnu -= v_expf_huge; @@ -11103,14 +11403,14 @@ public static Vector128 Invoke(Vector128 t) + ((c6 * ru + c5) * ru4 + (c2 * ru + c1)); - // result = (float)[poly + (n << 52)] + // result = (float)(poly + (n << 52)) Vector128 ret = Vector128.Narrow( - (polyl.AsUInt64() + Vector128.ShiftLeft(nl, 52)).AsDouble(), - (polyu.AsUInt64() + Vector128.ShiftLeft(nu, 52)).AsDouble() + (polyl.AsUInt64() + (nl << 52)).AsDouble(), + (polyu.AsUInt64() + (nu << 52)).AsDouble() ); // Check if -103 < |x| < 88 - if (Vector128.GreaterThanAny(x.AsUInt32() & Vector128.Create(V_MASK), Vector128.Create(V_ARG_MAX))) + if (Vector128.GreaterThanAny(Vector128.Abs(x).AsUInt32(), Vector128.Create(V_ARG_MAX))) { // (x > V_EXPF_MAX) ? float.PositiveInfinity : x Vector128 infinityMask = Vector128.GreaterThan(x, Vector128.Create(V_EXPF_MAX)); @@ -11125,14 +11425,11 @@ public static Vector128 Invoke(Vector128 t) ret = Vector128.AndNot(ret, Vector128.LessThan(x, Vector128.Create(V_EXPF_MIN))); } - return ret.As(); + return ret; } - public static Vector256 Invoke(Vector256 t) + public static Vector256 Invoke(Vector256 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector256 x = t.AsSingle(); - // Convert x to double precision (Vector256 xl, Vector256 xu) = Vector256.Widen(x); @@ -11147,11 +11444,11 @@ public static Vector256 Invoke(Vector256 t) Vector256 dnl = zl + v_expf_huge; Vector256 dnu = zu + v_expf_huge; - // n = int (z) + // n = (int)z Vector256 nl = dnl.AsUInt64(); Vector256 nu = dnu.AsUInt64(); - // dn = double(n) + // dn = (double)n dnl -= v_expf_huge; dnu -= v_expf_huge; @@ -11182,14 +11479,14 @@ public static Vector256 Invoke(Vector256 t) + ((c6 * ru + c5) * ru4 + (c2 * ru + c1)); - // result = (float)[poly + (n << 52)] + // result = (float)(poly + (n << 52)) Vector256 ret = Vector256.Narrow( - (polyl.AsUInt64() + Vector256.ShiftLeft(nl, 52)).AsDouble(), - (polyu.AsUInt64() + Vector256.ShiftLeft(nu, 52)).AsDouble() + (polyl.AsUInt64() + (nl << 52)).AsDouble(), + (polyu.AsUInt64() + (nu << 52)).AsDouble() ); // Check if -103 < |x| < 88 - if (Vector256.GreaterThanAny(x.AsUInt32() & Vector256.Create(V_MASK), Vector256.Create(V_ARG_MAX))) + if (Vector256.GreaterThanAny(Vector256.Abs(x).AsUInt32(), Vector256.Create(V_ARG_MAX))) { // (x > V_EXPF_MAX) ? float.PositiveInfinity : x Vector256 infinityMask = Vector256.GreaterThan(x, Vector256.Create(V_EXPF_MAX)); @@ -11204,14 +11501,11 @@ public static Vector256 Invoke(Vector256 t) ret = Vector256.AndNot(ret, Vector256.LessThan(x, Vector256.Create(V_EXPF_MIN))); } - return ret.As(); + return ret; } - public static Vector512 Invoke(Vector512 t) + public static Vector512 Invoke(Vector512 x) { - Debug.Assert(typeof(T) == typeof(float)); - Vector512 x = t.AsSingle(); - // Convert x to double precision (Vector512 xl, Vector512 xu) = Vector512.Widen(x); @@ -11226,11 +11520,11 @@ public static Vector512 Invoke(Vector512 t) Vector512 dnl = zl + v_expf_huge; Vector512 dnu = zu + v_expf_huge; - // n = int (z) + // n = (int)z Vector512 nl = dnl.AsUInt64(); Vector512 nu = dnu.AsUInt64(); - // dn = double(n) + // dn = (double)n dnl -= v_expf_huge; dnu -= v_expf_huge; @@ -11261,14 +11555,14 @@ public static Vector512 Invoke(Vector512 t) + ((c6 * ru + c5) * ru4 + (c2 * ru + c1)); - // result = (float)[poly + (n << 52)] + // result = (float)(poly + (n << 52)) Vector512 ret = Vector512.Narrow( - (polyl.AsUInt64() + Vector512.ShiftLeft(nl, 52)).AsDouble(), - (polyu.AsUInt64() + Vector512.ShiftLeft(nu, 52)).AsDouble() + (polyl.AsUInt64() + (nl << 52)).AsDouble(), + (polyu.AsUInt64() + (nu << 52)).AsDouble() ); // Check if -103 < |x| < 88 - if (Vector512.GreaterThanAny(x.AsUInt32() & Vector512.Create(V_MASK), Vector512.Create(V_ARG_MAX))) + if (Vector512.GreaterThanAny(Vector512.Abs(x).AsUInt32(), Vector512.Create(V_ARG_MAX))) { // (x > V_EXPF_MAX) ? float.PositiveInfinity : x Vector512 infinityMask = Vector512.GreaterThan(x, Vector512.Create(V_EXPF_MAX)); @@ -11283,9 +11577,10 @@ public static Vector512 Invoke(Vector512 t) ret = Vector512.AndNot(ret, Vector512.LessThan(x, Vector512.Create(V_EXPF_MIN))); } - return ret.As(); + return ret; } } +#endif /// T.Cosh(x) internal readonly struct CoshOperator : IUnaryOperator @@ -12336,7 +12631,7 @@ public static Vector128 Invoke(Vector128 x) x ); - specialMask = Unsafe.BitCast, Vector128>(temp); + specialMask = temp.AsUInt64(); } // Reduce the mantissa to [+2/3, +4/3] @@ -12417,7 +12712,7 @@ public static Vector256 Invoke(Vector256 x) x ); - specialMask = Unsafe.BitCast, Vector256>(temp); + specialMask = temp.AsUInt64(); } // Reduce the mantissa to [+2/3, +4/3] @@ -12498,7 +12793,7 @@ public static Vector512 Invoke(Vector512 x) x ); - specialMask = Unsafe.BitCast, Vector512>(temp); + specialMask = temp.AsUInt64(); } // Reduce the mantissa to [+2/3, +4/3] From bb8dbd91d8a5d59ecbae8d5676e648567fe38d27 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 17 Jan 2024 21:03:39 -0800 Subject: [PATCH 3/7] Ensure the right allowedVariance is used for the vectorized exp tests --- .../tests/Vectors/VectorTestMemberData.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs index 7fdb5189ced4f..cc732b6b5978d 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs @@ -68,7 +68,7 @@ public static IEnumerable ExpDouble yield return new object[] { 1.4142135623730950, 4.1132503787829275, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (sqrt(2)) yield return new object[] { 1.4426950408889634, 4.2320861065570819, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (log2(e)) yield return new object[] { 1.5707963267948966, 4.8104773809653517, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 2) - yield return new object[] { 2.3025850929940457, 10.0, 0.0 }; // value: (ln(10)) + yield return new object[] { 2.3025850929940457, 10.0, DoubleCrossPlatformMachineEpsilon * 10 }; // value: (ln(10)) yield return new object[] { 2.7182818284590452, 15.154262241479264, DoubleCrossPlatformMachineEpsilon * 100 }; // value: (e) yield return new object[] { 3.1415926535897932, 23.140692632779269, DoubleCrossPlatformMachineEpsilon * 100 }; // value: (pi) yield return new object[] { double.PositiveInfinity, double.PositiveInfinity, 0.0 }; @@ -108,7 +108,7 @@ public static IEnumerable ExpSingle yield return new object[] { 1.41421356f, 4.11325038f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (sqrt(2)) yield return new object[] { 1.44269504f, 4.23208611f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (log2(e)) yield return new object[] { 1.57079633f, 4.81047738f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 2) - yield return new object[] { 2.30258509f, 10.0f, 0.0f }; // value: (ln(10)) + yield return new object[] { 2.30258509f, 10.0f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (ln(10)) yield return new object[] { 2.71828183f, 15.1542622f, SingleCrossPlatformMachineEpsilon * 100 }; // value: (e) yield return new object[] { 3.14159265f, 23.1406926f, SingleCrossPlatformMachineEpsilon * 100 }; // value: (pi) yield return new object[] { float.PositiveInfinity, float.PositiveInfinity, 0.0f }; From c81f0d7948e765268eac1d506065c1eca1ecb11e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 18 Jan 2024 09:48:34 -0800 Subject: [PATCH 4/7] Ensure V128/256/512 defers to the next smaller vector size by operating on the lower/upper halves --- .../System/Runtime/Intrinsics/Vector128.cs | 24 +++++++------------ .../System/Runtime/Intrinsics/Vector256.cs | 24 +++++++------------ .../System/Runtime/Intrinsics/Vector512.cs | 10 ++++++-- 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index db3e30f6d96d5..d7c75215dc4af 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1426,20 +1426,6 @@ public static bool EqualsAny(Vector128 left, Vector128 right) || Vector64.EqualsAny(left._upper, right._upper); } - internal static Vector128 Exp(Vector128 vector) - where T : IExponentialFunctions - { - Unsafe.SkipInit(out Vector128 result); - - for (int index = 0; index < Vector128.Count; index++) - { - T value = T.Exp(vector.GetElement(index)); - result.SetElementUnsafe(index, value); - } - - return result; - } - /// Computes the Exp of each element in a vector. /// The vector that will have its Exp computed. /// A vector whose elements are the Exp of the elements in . @@ -1451,7 +1437,10 @@ public static Vector128 Exp(Vector128 vector) } else { - return Exp(vector); + return Create( + Vector64.Log(vector._lower), + Vector64.Log(vector._upper) + ); } } @@ -1466,7 +1455,10 @@ public static Vector128 Exp(Vector128 vector) } else { - return Exp(vector); + return Create( + Vector64.Log(vector._lower), + Vector64.Log(vector._upper) + ); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 25de2fc09fc5e..583d0b1eda980 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1402,20 +1402,6 @@ public static bool EqualsAny(Vector256 left, Vector256 right) || Vector128.EqualsAny(left._upper, right._upper); } - internal static Vector256 Exp(Vector256 vector) - where T : IExponentialFunctions - { - Unsafe.SkipInit(out Vector256 result); - - for (int index = 0; index < Vector256.Count; index++) - { - T value = T.Exp(vector.GetElement(index)); - result.SetElementUnsafe(index, value); - } - - return result; - } - /// Computes the Exp of each element in a vector. /// The vector that will have its Exp computed. /// A vector whose elements are the Exp of the elements in . @@ -1427,7 +1413,10 @@ public static Vector256 Exp(Vector256 vector) } else { - return Exp(vector); + return Create( + Vector128.Exp(vector._lower), + Vector128.Exp(vector._upper) + ); } } @@ -1442,7 +1431,10 @@ public static Vector256 Exp(Vector256 vector) } else { - return Exp(vector); + return Create( + Vector128.Exp(vector._lower), + Vector128.Exp(vector._upper) + ); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index c314043b00714..3376c64d581c5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1478,7 +1478,10 @@ public static Vector512 Exp(Vector512 vector) } else { - return Exp(vector); + return Create( + Vector256.Exp(vector._lower), + Vector256.Exp(vector._upper) + ); } } @@ -1493,7 +1496,10 @@ public static Vector512 Exp(Vector512 vector) } else { - return Exp(vector); + return Create( + Vector256.Exp(vector._lower), + Vector256.Exp(vector._upper) + ); } } From a5909ef834e072fe5ae8d32a8a781b323a268721 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 18 Jan 2024 09:51:42 -0800 Subject: [PATCH 5/7] Ensure the right allowedVariance amounts are used for the vectorized Exp(float) tests --- .../tests/Vectors/VectorTestMemberData.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs index cc732b6b5978d..1579fa54e6af9 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/VectorTestMemberData.cs @@ -90,17 +90,17 @@ public static IEnumerable ExpSingle yield return new object[] { -1.0f, 0.367879441f, SingleCrossPlatformMachineEpsilon }; yield return new object[] { -0.785398163f, 0.455938128f, SingleCrossPlatformMachineEpsilon }; // value: -(pi / 4) yield return new object[] { -0.707106781f, 0.493068691f, SingleCrossPlatformMachineEpsilon }; // value: -(1 / sqrt(2)) - yield return new object[] { -0.693147181f, 0.5f, 0.0f }; // value: -(ln(2)) + yield return new object[] { -0.693147181f, 0.5f, SingleCrossPlatformMachineEpsilon }; // value: -(ln(2)) yield return new object[] { -0.636619772f, 0.529077808f, SingleCrossPlatformMachineEpsilon }; // value: -(2 / pi) yield return new object[] { -0.434294482f, 0.647721485f, SingleCrossPlatformMachineEpsilon }; // value: -(log10(e)) yield return new object[] { -0.318309886f, 0.727377349f, SingleCrossPlatformMachineEpsilon }; // value: -(1 / pi) - yield return new object[] { -0.0f, 1.0f, 0.0f }; + yield return new object[] { -0.0f, 1.0f, SingleCrossPlatformMachineEpsilon * 10 }; yield return new object[] { float.NaN, float.NaN, 0.0f }; - yield return new object[] { 0.0f, 1.0f, 0.0f }; + yield return new object[] { 0.0f, 1.0f, SingleCrossPlatformMachineEpsilon * 10 }; yield return new object[] { 0.318309886f, 1.37480223f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (1 / pi) yield return new object[] { 0.434294482f, 1.54387344f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (log10(e)) yield return new object[] { 0.636619772f, 1.89008116f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (2 / pi) - yield return new object[] { 0.693147181f, 2.0f, 0.0f }; // value: (ln(2)) + yield return new object[] { 0.693147181f, 2.0f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (ln(2)) yield return new object[] { 0.707106781f, 2.02811498f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (1 / sqrt(2)) yield return new object[] { 0.785398163f, 2.19328005f, SingleCrossPlatformMachineEpsilon * 10 }; // value: (pi / 4) yield return new object[] { 1.0f, 2.71828183f, SingleCrossPlatformMachineEpsilon * 10 }; // expected: (e) From 7b04a9234994cb9a986e56be33fb39932a57f43b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 18 Jan 2024 11:00:03 -0800 Subject: [PATCH 6/7] Ensure we call Exp and that the methods are properly inlined --- .../System/Runtime/Intrinsics/Vector128.cs | 22 ++++++++------- .../System/Runtime/Intrinsics/Vector256.cs | 14 ++++++---- .../System/Runtime/Intrinsics/Vector512.cs | 28 ++++++------------- .../src/System/Runtime/Intrinsics/Vector64.cs | 16 +++++++---- 4 files changed, 39 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index d7c75215dc4af..0e54cf778d8e7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1426,9 +1426,8 @@ public static bool EqualsAny(Vector128 left, Vector128 right) || Vector64.EqualsAny(left._upper, right._upper); } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Exp(Vector128 vector) { if (IsHardwareAccelerated) @@ -1438,15 +1437,14 @@ public static Vector128 Exp(Vector128 vector) else { return Create( - Vector64.Log(vector._lower), - Vector64.Log(vector._upper) + Vector64.Exp(vector._lower), + Vector64.Exp(vector._upper) ); } } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Exp(Vector128 vector) { if (IsHardwareAccelerated) @@ -1456,8 +1454,8 @@ public static Vector128 Exp(Vector128 vector) else { return Create( - Vector64.Log(vector._lower), - Vector64.Log(vector._upper) + Vector64.Exp(vector._lower), + Vector64.Exp(vector._upper) ); } } @@ -1818,6 +1816,7 @@ internal static Vector128 LoadUnsafe(ref char source, nuint elementOffse LoadUnsafe(ref Unsafe.As(ref source), elementOffset); /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Log(Vector128 vector) { if (IsHardwareAccelerated) @@ -1834,6 +1833,7 @@ public static Vector128 Log(Vector128 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Log(Vector128 vector) { if (IsHardwareAccelerated) @@ -1850,6 +1850,7 @@ public static Vector128 Log(Vector128 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Log2(Vector128 vector) { if (IsHardwareAccelerated) @@ -1866,6 +1867,7 @@ public static Vector128 Log2(Vector128 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Log2(Vector128 vector) { if (IsHardwareAccelerated) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 583d0b1eda980..bcbd4a219a0f7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1402,9 +1402,8 @@ public static bool EqualsAny(Vector256 left, Vector256 right) || Vector128.EqualsAny(left._upper, right._upper); } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Exp(Vector256 vector) { if (IsHardwareAccelerated) @@ -1420,9 +1419,8 @@ public static Vector256 Exp(Vector256 vector) } } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Exp(Vector256 vector) { if (IsHardwareAccelerated) @@ -1792,6 +1790,7 @@ internal static Vector256 LoadUnsafe(ref char source, nuint elementOffse LoadUnsafe(ref Unsafe.As(ref source), elementOffset); /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Log(Vector256 vector) { if (IsHardwareAccelerated) @@ -1808,6 +1807,7 @@ public static Vector256 Log(Vector256 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Log(Vector256 vector) { if (IsHardwareAccelerated) @@ -1824,6 +1824,7 @@ public static Vector256 Log(Vector256 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Log2(Vector256 vector) { if (IsHardwareAccelerated) @@ -1840,6 +1841,7 @@ public static Vector256 Log2(Vector256 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Log2(Vector256 vector) { if (IsHardwareAccelerated) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 3376c64d581c5..95e26012af9ba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1453,23 +1453,8 @@ public static bool EqualsAny(Vector512 left, Vector512 right) || Vector256.EqualsAny(left._upper, right._upper); } - internal static Vector512 Exp(Vector512 vector) - where T : IExponentialFunctions - { - Unsafe.SkipInit(out Vector512 result); - - for (int index = 0; index < Vector512.Count; index++) - { - T value = T.Exp(vector.GetElement(index)); - result.SetElementUnsafe(index, value); - } - - return result; - } - - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Exp(Vector512 vector) { if (IsHardwareAccelerated) @@ -1485,9 +1470,8 @@ public static Vector512 Exp(Vector512 vector) } } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Exp(Vector512 vector) { if (IsHardwareAccelerated) @@ -1857,6 +1841,7 @@ internal static Vector512 LoadUnsafe(ref char source, nuint elementOffse LoadUnsafe(ref Unsafe.As(ref source), elementOffset); /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Log(Vector512 vector) { if (IsHardwareAccelerated) @@ -1873,6 +1858,7 @@ public static Vector512 Log(Vector512 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Log(Vector512 vector) { if (IsHardwareAccelerated) @@ -1889,6 +1875,7 @@ public static Vector512 Log(Vector512 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Log2(Vector512 vector) { if (IsHardwareAccelerated) @@ -1905,6 +1892,7 @@ public static Vector512 Log2(Vector512 vector) } /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Log2(Vector512 vector) { if (IsHardwareAccelerated) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 7acaed2a15b0b..d4789a9a0a85b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1170,9 +1170,10 @@ internal static Vector64 Exp(Vector64 vector) return result; } - /// Computes the Exp of each element in a vector. + /// Computes the exp of each element in a vector. /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// A vector whose elements are the exp of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Exp(Vector64 vector) { if (IsHardwareAccelerated) @@ -1185,9 +1186,10 @@ public static Vector64 Exp(Vector64 vector) } } - /// Computes the Exp of each element in a vector. - /// The vector that will have its Exp computed. - /// A vector whose elements are the Exp of the elements in . + /// Computes the exp of each element in a vector. + /// The vector that will have its exp computed. + /// A vector whose elements are the exp of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Exp(Vector64 vector) { if (IsHardwareAccelerated) @@ -1632,6 +1634,7 @@ internal static Vector64 Log(Vector64 vector) /// Computes the log of each element in a vector. /// The vector that will have its log computed. /// A vector whose elements are the log of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Log(Vector64 vector) { if (IsHardwareAccelerated) @@ -1647,6 +1650,7 @@ public static Vector64 Log(Vector64 vector) /// Computes the log of each element in a vector. /// The vector that will have its log computed. /// A vector whose elements are the log of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Log(Vector64 vector) { if (IsHardwareAccelerated) @@ -1676,6 +1680,7 @@ internal static Vector64 Log2(Vector64 vector) /// Computes the log2 of each element in a vector. /// The vector that will have its log2 computed. /// A vector whose elements are the log2 of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Log2(Vector64 vector) { if (IsHardwareAccelerated) @@ -1691,6 +1696,7 @@ public static Vector64 Log2(Vector64 vector) /// Computes the log2 of each element in a vector. /// The vector that will have its log2 computed. /// A vector whose elements are the log2 of the elements in . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 Log2(Vector64 vector) { if (IsHardwareAccelerated) From 398c78eb528fc67596d35398c2cb9fdc530b3c99 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 18 Jan 2024 12:57:28 -0800 Subject: [PATCH 7/7] Skip the Exp test for Vector128/256/512 on Mono due to https://github.com/dotnet/runtime/issues/97176 --- .../System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs | 2 ++ .../System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs | 2 ++ .../System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs | 2 ++ 3 files changed, 6 insertions(+) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index a81e6a51b81e0..eec2134daf6e6 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -4689,6 +4689,7 @@ private static void TestGetOne() [Theory] [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpDoubleTest(double value, double expectedResult, double variance) { Vector128 actualResult = Vector128.Exp(Vector128.Create(value)); @@ -4697,6 +4698,7 @@ public void ExpDoubleTest(double value, double expectedResult, double variance) [Theory] [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpSingleTest(float value, float expectedResult, float variance) { Vector128 actualResult = Vector128.Exp(Vector128.Create(value)); diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 70e6eb2cee76b..3272693a10fed 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -5704,6 +5704,7 @@ private static void TestGetOne() [Theory] [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpDoubleTest(double value, double expectedResult, double variance) { Vector256 actualResult = Vector256.Exp(Vector256.Create(value)); @@ -5712,6 +5713,7 @@ public void ExpDoubleTest(double value, double expectedResult, double variance) [Theory] [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpSingleTest(float value, float expectedResult, float variance) { Vector256 actualResult = Vector256.Exp(Vector256.Create(value)); diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index 580d67bb84315..9192aecfb54c4 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -5136,6 +5136,7 @@ private static void TestIsNotSupported() [Theory] [MemberData(nameof(VectorTestMemberData.ExpDouble), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpDoubleTest(double value, double expectedResult, double variance) { Vector512 actualResult = Vector512.Exp(Vector512.Create(value)); @@ -5144,6 +5145,7 @@ public void ExpDoubleTest(double value, double expectedResult, double variance) [Theory] [MemberData(nameof(VectorTestMemberData.ExpSingle), MemberType = typeof(VectorTestMemberData))] + [SkipOnMono("https://github.com/dotnet/runtime/issues/97176")] public void ExpSingleTest(float value, float expectedResult, float variance) { Vector512 actualResult = Vector512.Exp(Vector512.Create(value));