diff --git a/g3doc/quick_reference.md b/g3doc/quick_reference.md index 8220e9b718..2ee3dad3a9 100644 --- a/g3doc/quick_reference.md +++ b/g3doc/quick_reference.md @@ -1050,6 +1050,9 @@ types, and on SVE/RVV. * V **AndNot**(V a, V b): returns `~a[i] & b[i]`. +* V **MaskedOrOrZero**(M m, V a, V b): returns `a[i] || b[i]` + or `zero` if `m[i]` is false. + The following three-argument functions may be more efficient than assembling them from 2-argument functions: @@ -2237,6 +2240,22 @@ The following `ReverseN` must not be called if `Lanes(D()) < N`: must be in the range `[0, 2 * Lanes(d))` but need not be unique. The index type `TI` must be an integer of the same size as `TFromD`. +* V **TableLookupLanesOr**(M m, V a, V b, unspecified) returns the + result of `TableLookupLanes(a, unspecified)` where `m[i]` is true, and returns + `b[i]` where `m[i]` is false. + +* V **TableLookupLanesOrZero**(M m, V a, unspecified) returns + the result of `TableLookupLanes(a, unspecified)` where `m[i]` is true, and + returns zero where `m[i]` is false. + +* V **TwoTablesLookupLanesOr**(D d, M m, V a, V b, unspecified) + returns the result of `TwoTablesLookupLanes(V a, V b, unspecified)` where + `m[i]` is true, and `a[i]` where `m[i]` is false. + +* V **TwoTablesLookupLanesOrZero**(D d, M m, V a, V b, unspecified) + returns the result of `TwoTablesLookupLanes(V a, V b, unspecified)` where + `m[i]` is true, and zero where `m[i]` is false. + * V **Per4LaneBlockShuffle**<size_t kIdx3, size_t kIdx2, size_t kIdx1, size_t kIdx0>(V v) does a per 4-lane block shuffle of `v` if `Lanes(DFromV())` is greater than or equal to 4 or a shuffle of the @@ -2377,6 +2396,24 @@ more efficient on some targets. * T **ReduceMin**(D, V v): returns the minimum of all lanes. * T **ReduceMax**(D, V v): returns the maximum of all lanes. +### Masked reductions + +**Note**: Horizontal operations (across lanes of the same vector) such as +reductions are slower than normal SIMD operations and are typically used outside +critical loops. + +All ops in this section ignore lanes where `mask=false`. These are equivalent +to, and potentially more efficient than, `GetLane(SumOfLanes(d, +IfThenElseZero(m, v)))` etc. The result is implementation-defined when all mask +elements are false. + +* T **MaskedReduceSum**(D, M m, V v): returns the sum of all lanes + where `m[i]` is `true`. +* T **MaskedReduceMin**(D, M m, V v): returns the minimum of all + lanes where `m[i]` is `true`. +* T **MaskedReduceMax**(D, M m, V v): returns the maximum of all + lanes where `m[i]` is `true`. + ### Crypto Ops in this section are only available if `HWY_TARGET != HWY_SCALAR`: diff --git a/hwy/ops/arm_sve-inl.h b/hwy/ops/arm_sve-inl.h index 2dde1479de..66ad1dfe3b 100644 --- a/hwy/ops/arm_sve-inl.h +++ b/hwy/ops/arm_sve-inl.h @@ -219,6 +219,15 @@ HWY_SVE_FOREACH_BF16_UNCONDITIONAL(HWY_SPECIALIZE, _, _) HWY_API HWY_SVE_V(BASE, BITS) NAME(HWY_SVE_V(BASE, BITS) v) { \ return sv##OP##_##CHAR##BITS(v); \ } +#define HWY_SVE_RETV_ARGMV_M(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) \ + NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \ + return sv##OP##_##CHAR##BITS##_m(b, m, a); \ + } +#define HWY_SVE_RETV_ARGMV_Z(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a) { \ + return sv##OP##_##CHAR##BITS##_z(m, a); \ + } // vector = f(vector, scalar), e.g. detail::AddN #define HWY_SVE_RETV_ARGPVN(BASE, CHAR, BITS, HALF, NAME, OP) \ @@ -252,6 +261,17 @@ HWY_SVE_FOREACH_BF16_UNCONDITIONAL(HWY_SPECIALIZE, _, _) NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \ return sv##OP##_##CHAR##BITS##_x(m, a, b); \ } +#define HWY_SVE_RETV_ARGMVV_M(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) \ + NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \ + return sv##OP##_##CHAR##BITS##_m(m, a, b); \ + } +// User-specified mask. Mask=false value is zero. +#define HWY_SVE_RETV_ARGMVVZ(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) \ + NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b) { \ + return sv##OP##_##CHAR##BITS##_z(m, a, b); \ + } #define HWY_SVE_RETV_ARGVVV(BASE, CHAR, BITS, HALF, NAME, OP) \ HWY_API HWY_SVE_V(BASE, BITS) \ @@ -260,6 +280,13 @@ HWY_SVE_FOREACH_BF16_UNCONDITIONAL(HWY_SPECIALIZE, _, _) return sv##OP##_##CHAR##BITS(a, b, c); \ } +#define HWY_SVE_RETV_ARGMVVV(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) \ + NAME(svbool_t m, HWY_SVE_V(BASE, BITS) a, HWY_SVE_V(BASE, BITS) b, \ + HWY_SVE_V(BASE, BITS) c) { \ + return sv##OP##_##CHAR##BITS##_m(m, a, b, c); \ + } + // ------------------------------ Lanes namespace detail { @@ -727,6 +754,9 @@ HWY_API V Or(const V a, const V b) { return BitCast(df, Or(BitCast(du, a), BitCast(du, b))); } +// ------------------------------ MaskedOrOrZero +HWY_SVE_FOREACH_UI(HWY_SVE_RETV_ARGMVVZ, MaskedOrOrZero, orr) + // ------------------------------ Xor namespace detail { @@ -3288,6 +3318,25 @@ HWY_API TFromD ReduceMax(D d, VFromD v) { return detail::MaxOfLanesM(detail::MakeMask(d), v); } +#ifdef HWY_NATIVE_MASKED_REDUCE_SCALAR +#undef HWY_NATIVE_MASKED_REDUCE_SCALAR +#else +#define HWY_NATIVE_MASKED_REDUCE_SCALAR +#endif + +template +HWY_API TFromD MaskedReduceSum(D /*d*/, M m, VFromD v) { + return detail::SumOfLanesM(m, v); +} +template +HWY_API TFromD MaskedReduceMin(D /*d*/, M m, VFromD v) { + return detail::MinOfLanesM(m, v); +} +template +HWY_API TFromD MaskedReduceMax(D /*d*/, M m, VFromD v) { + return detail::MaxOfLanesM(m, v); +} + // ------------------------------ SumOfLanes template @@ -4755,6 +4804,23 @@ HWY_API V IfNegativeThenElse(V v, V yes, V no) { static_assert(IsSigned>(), "Only works for signed/float"); return IfThenElse(IsNegative(v), yes, no); } +// ------------------------------ IfNegativeThenNegOrUndefIfZero + +#ifdef HWY_NATIVE_INTEGER_IF_NEGATIVE_THEN_NEG +#undef HWY_NATIVE_INTEGER_IF_NEGATIVE_THEN_NEG +#else +#define HWY_NATIVE_INTEGER_IF_NEGATIVE_THEN_NEG +#endif + +#define HWY_SVE_NEG_IF(BASE, CHAR, BITS, HALF, NAME, OP) \ + HWY_API HWY_SVE_V(BASE, BITS) \ + NAME(HWY_SVE_V(BASE, BITS) mask, HWY_SVE_V(BASE, BITS) v) { \ + return sv##OP##_##CHAR##BITS##_m(v, IsNegative(mask), v); \ + } + +HWY_SVE_FOREACH_IF(HWY_SVE_NEG_IF, IfNegativeThenNegOrUndefIfZero, neg) + +#undef HWY_SVE_NEG_IF // ------------------------------ AverageRound (ShiftRight) @@ -6291,13 +6357,19 @@ HWY_API V HighestSetBitIndex(V v) { #undef HWY_SVE_IF_NOT_EMULATED_D #undef HWY_SVE_PTRUE #undef HWY_SVE_RETV_ARGMVV +#undef HWY_SVE_RETV_ARGMVVZ #undef HWY_SVE_RETV_ARGPV #undef HWY_SVE_RETV_ARGPVN #undef HWY_SVE_RETV_ARGPVV #undef HWY_SVE_RETV_ARGV #undef HWY_SVE_RETV_ARGVN +#undef HWY_SVE_RETV_ARGMV +#undef HWY_SVE_RETV_ARGMV_M +#undef HWY_SVE_RETV_ARGMV_Z #undef HWY_SVE_RETV_ARGVV +#undef HWY_SVE_RETV_ARGMVV_M #undef HWY_SVE_RETV_ARGVVV +#undef HWY_SVE_RETV_ARGMVVV #undef HWY_SVE_T #undef HWY_SVE_UNDEFINED #undef HWY_SVE_V diff --git a/hwy/ops/generic_ops-inl.h b/hwy/ops/generic_ops-inl.h index 99b518d99c..efee4bc971 100644 --- a/hwy/ops/generic_ops-inl.h +++ b/hwy/ops/generic_ops-inl.h @@ -882,6 +882,28 @@ HWY_API TFromD ReduceMax(D d, VFromD v) { } #endif // HWY_NATIVE_REDUCE_MINMAX_4_UI8 +#if (defined(HWY_NATIVE_MASKED_REDUCE_SCALAR) == defined(HWY_TARGET_TOGGLE)) +#ifdef HWY_NATIVE_MASKED_REDUCE_SCALAR +#undef HWY_NATIVE_MASKED_REDUCE_SCALAR +#else +#define HWY_NATIVE_MASKED_REDUCE_SCALAR +#endif + +template +HWY_API TFromD MaskedReduceSum(D d, M m, VFromD v) { + return ReduceSum(d, IfThenElseZero(m, v)); +} +template +HWY_API TFromD MaskedReduceMin(D d, M m, VFromD v) { + return ReduceMin(d, IfThenElse(m, v, MaxOfLanes(d, v))); +} +template +HWY_API TFromD MaskedReduceMax(D d, M m, VFromD v) { + return ReduceMax(d, IfThenElseZero(m, v)); +} + +#endif // HWY_NATIVE_MASKED_REDUCE_SCALAR + // ------------------------------ IsEitherNaN #if (defined(HWY_NATIVE_IS_EITHER_NAN) == defined(HWY_TARGET_TOGGLE)) #ifdef HWY_NATIVE_IS_EITHER_NAN @@ -6444,6 +6466,30 @@ HWY_API V ReverseBits(V v) { } #endif // HWY_NATIVE_REVERSE_BITS_UI16_32_64 +// ------------------------------ TableLookupLanesOr +template +HWY_API V TableLookupLanesOr(M m, V a, V b, IndicesFromD> idx) { + return IfThenElse(m, TableLookupLanes(a, idx), b); +} + +// ------------------------------ TableLookupLanesOrZero +template +HWY_API V TableLookupLanesOrZero(M m, V a, IndicesFromD> idx) { + return IfThenElseZero(m, TableLookupLanes(a, idx)); +} + +// ------------------------------ TwoTablesLookupLanesOr +template +HWY_API V TwoTablesLookupLanesOr(D d, M m, V a, V b, IndicesFromD idx) { + return IfThenElse(m, TwoTablesLookupLanes(d, a, b, idx), a); +} + +// ------------------------------ TwoTablesLookupLanesOrZero +template +HWY_API V TwoTablesLookupLanesOrZero(D d, M m, V a, V b, IndicesFromD idx) { + return IfThenElse(m, TwoTablesLookupLanes(d, a, b, idx), Zero(d)); +} + // ------------------------------ Per4LaneBlockShuffle #if (defined(HWY_NATIVE_PER4LANEBLKSHUF_DUP32) == defined(HWY_TARGET_TOGGLE)) @@ -7299,6 +7345,10 @@ HWY_API V BitShuffle(V v, VI idx) { #endif // HWY_NATIVE_BITSHUFFLE +template +HWY_API V MaskedOrOrZero(M m, V a, V b) { + return IfThenElseZero(m, Or(a, b)); +} // ================================================== Operator wrapper // SVE* and RVV currently cannot define operators and have already defined diff --git a/hwy/tests/logical_test.cc b/hwy/tests/logical_test.cc index ecd7589c9e..5abc2277bc 100644 --- a/hwy/tests/logical_test.cc +++ b/hwy/tests/logical_test.cc @@ -146,6 +146,28 @@ HWY_NOINLINE void TestAllTestBit() { ForIntegerTypes(ForPartialVectors()); } +struct TestMaskedOrOrZero { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const MFromD all_true = MaskTrue(d); + const auto v1 = Iota(d, 1); + const auto v2 = Iota(d, 2); + + HWY_ASSERT_VEC_EQ(d, Or(v2, v1), MaskedOrOrZero(all_true, v1, v2)); + + const MFromD first_five = FirstN(d, 5); + const Vec v0 = Zero(d); + + const Vec v1_exp = IfThenElse(first_five, Or(v2, v1), v0); + + HWY_ASSERT_VEC_EQ(d, v1_exp, MaskedOrOrZero(first_five, v1, v2)); + } +}; + +HWY_NOINLINE void TestAllMaskedLogical() { + ForAllTypes(ForPartialVectors()); +} + } // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE @@ -159,6 +181,7 @@ HWY_BEFORE_TEST(HwyLogicalTest); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllNot); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllLogical); HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllTestBit); +HWY_EXPORT_AND_TEST_P(HwyLogicalTest, TestAllMaskedLogical); HWY_AFTER_TEST(); } // namespace } // namespace hwy diff --git a/hwy/tests/reduction_test.cc b/hwy/tests/reduction_test.cc index fffc4a7873..fd35f645f6 100644 --- a/hwy/tests/reduction_test.cc +++ b/hwy/tests/reduction_test.cc @@ -352,6 +352,122 @@ HWY_NOINLINE void TestAllSumsOf8() { ForGEVectors<64, TestSumsOf8>()(uint8_t()); } +struct TestMaskedReduceSum { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + RandomState rng; + + const Vec v2 = Iota(d, 2); + + const size_t N = Lanes(d); + auto bool_lanes = AllocateAligned(N); + HWY_ASSERT(bool_lanes); + + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + T expected = 0; + for (size_t i = 0; i < N; ++i) { + bool_lanes[i] = (Random32(&rng) & 1024) ? T(1) : T(0); + if (bool_lanes[i]) { + expected += ConvertScalarTo(i + 2); + } + } + + const Vec mask_i = Load(d, bool_lanes.get()); + const Mask mask = RebindMask(d, Gt(mask_i, Zero(d))); + + // If all elements are disabled the result is implementation defined + if (AllFalse(d, mask)) { + continue; + } + + HWY_ASSERT_EQ(expected, MaskedReduceSum(d, mask, v2)); + } + } +}; + +HWY_NOINLINE void TestAllMaskedReduceSum() { + ForAllTypes(ForPartialVectors()); +} + +struct TestMaskedReduceMin { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + RandomState rng; + + const Vec v2 = Iota(d, 2); + + const size_t N = Lanes(d); + auto bool_lanes = AllocateAligned(N); + HWY_ASSERT(bool_lanes); + + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + T expected = + ConvertScalarTo(N + 3); // larger than any values in the vector + for (size_t i = 0; i < N; ++i) { + bool_lanes[i] = (Random32(&rng) & 1024) ? T(1) : T(0); + if (bool_lanes[i]) { + if (expected > ConvertScalarTo(i + 2)) { + expected = ConvertScalarTo(i + 2); + } + } + } + + const Vec mask_i = Load(d, bool_lanes.get()); + const Mask mask = RebindMask(d, Gt(mask_i, Zero(d))); + + // If all elements are disabled the result is implementation defined + if (AllFalse(d, mask)) { + continue; + } + + HWY_ASSERT_EQ(expected, MaskedReduceMin(d, mask, v2)); + } + } +}; + +HWY_NOINLINE void TestAllMaskedReduceMin() { + ForAllTypes(ForPartialVectors()); +} + +struct TestMaskedReduceMax { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + RandomState rng; + + const Vec v2 = Iota(d, 2); + + const size_t N = Lanes(d); + auto bool_lanes = AllocateAligned(N); + HWY_ASSERT(bool_lanes); + + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + T expected = 0; + for (size_t i = 0; i < N; ++i) { + bool_lanes[i] = (Random32(&rng) & 1024) ? T(1) : T(0); + if (bool_lanes[i]) { + if (expected < ConvertScalarTo(i + 2)) { + expected = ConvertScalarTo(i + 2); + } + } + } + + const Vec mask_i = Load(d, bool_lanes.get()); + const Mask mask = RebindMask(d, Gt(mask_i, Zero(d))); + + // If all elements are disabled the result is implementation defined + if (AllFalse(d, mask)) { + continue; + } + + HWY_ASSERT_EQ(expected, MaskedReduceMax(d, mask, v2)); + } + } +}; + +HWY_NOINLINE void TestAllMaskedReduceMax() { + ForAllTypes(ForPartialVectors()); +} + } // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE @@ -367,6 +483,10 @@ HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMinMaxOfLanes); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf2); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf4); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf8); + +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMaskedReduceSum); +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMaskedReduceMin); +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMaskedReduceMax); HWY_AFTER_TEST(); } // namespace } // namespace hwy diff --git a/hwy/tests/table_test.cc b/hwy/tests/table_test.cc index 09fdd7eaf6..eb5b1a8644 100644 --- a/hwy/tests/table_test.cc +++ b/hwy/tests/table_test.cc @@ -103,6 +103,59 @@ HWY_NOINLINE void TestAllTableLookupLanes() { ForAllTypes(ForPartialVectors()); } +struct TestTableLookupLanesOr { + template +#if HWY_TARGET != HWY_SCALARWE + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const RebindToSigned di; + using TI = TFromD; + + const size_t N = Lanes(d); + // Select indices from N-1 counting down + auto indices = IndicesFromVec( + d, Sub(Set(di, ConvertScalarTo(N - 1)), Iota(di, 0))); + + auto expected = AllocateAligned(N); + auto expected_zero = AllocateAligned(N); + auto bool_lanes = AllocateAligned(N); + HWY_ASSERT(expected && expected_zero && bool_lanes); + + const auto v1 = Iota(d, 5); + const auto v2 = Iota(d, 8); + + RandomState rng; + + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + for (size_t i = 0; i < N; ++i) { + bool_lanes[i] = (Random32(&rng) & 1024) ? T(1) : T(0); + + if (bool_lanes[i]) { + expected[i] = ConvertScalarTo(N - i + 5 - 1); // v1[N-1, N-2, ...] + expected_zero[i] = + ConvertScalarTo(N - i + 5 - 1); // v1[N-1, N-2, ...] + } else { + expected[i] = ConvertScalarTo(i + 8); // v2[i] + expected_zero[i] = ConvertScalarTo(0); + } + } + + const Vec mask_i = Load(d, bool_lanes.get()); + const Mask mask = RebindMask(d, Gt(mask_i, Zero(d))); + HWY_ASSERT_VEC_EQ(d, expected.get(), + TableLookupLanesOr(mask, v1, v2, indices)); + HWY_ASSERT_VEC_EQ(d, expected_zero.get(), + TableLookupLanesOrZero(mask, v1, indices)); +#else + (void) d; +#endif + } + } +}; + +HWY_NOINLINE void TestAllTableLookupLanesOr() { + ForAllTypes(ForPartialVectors()); +} + struct TestTwoTablesLookupLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { @@ -194,6 +247,64 @@ HWY_NOINLINE void TestAllTwoTablesLookupLanes() { ForAllTypes(ForPartialVectors()); } +struct TestTwoTablesLookupLanesOr { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const RebindToSigned di; + using TI = TFromD; + + const size_t N = Lanes(d); + // Select indices from N-1 counting down + auto idx_lower = Sub(Set(di, ConvertScalarTo(N - 1)), Iota(di, 0)); + auto idx_upper = Add(idx_lower, Set(di, ConvertScalarTo(N))); + auto indices = IndicesFromVec(d, OddEven(idx_upper, idx_lower)); + + auto expected = AllocateAligned(N); + auto expected_zero = AllocateAligned(N); + auto bool_lanes = AllocateAligned(N); + HWY_ASSERT(expected && expected_zero && bool_lanes); + + const auto v1 = Iota(d, 5); + const auto v2 = Iota(d, 8); + + RandomState rng; + + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + for (size_t i = 0; i < N; ++i) { + bool_lanes[i] = (Random32(&rng) & 1024) ? T(1) : T(0); + + if (bool_lanes[i]) { + if (i % 2) { + expected[i] = + ConvertScalarTo(N - i + 8 - 1); // v2[N-1, N-2, ...] + expected_zero[i] = + ConvertScalarTo(N - i + 8 - 1); // v2[N-1, N-2, ...] + } else { + expected[i] = + ConvertScalarTo(N - i + 5 - 1); // v1[N-1, N-2, ...] + expected_zero[i] = + ConvertScalarTo(N - i + 5 - 1); // v1[N-1, N-2, ...] + } + } else { + expected[i] = ConvertScalarTo(i + 5); // v1[i] + expected_zero[i] = ConvertScalarTo(0); + } + } + + const Vec mask_i = Load(d, bool_lanes.get()); + const Mask mask = RebindMask(d, Gt(mask_i, Zero(d))); + HWY_ASSERT_VEC_EQ(d, expected.get(), + TwoTablesLookupLanesOr(d, mask, v1, v2, indices)); + HWY_ASSERT_VEC_EQ(d, expected_zero.get(), + TwoTablesLookupLanesOrZero(d, mask, v1, v2, indices)); + } + } +}; + +HWY_NOINLINE void TestAllTwoTablesLookupLanesOr() { + ForAllTypes(ForPartialVectors()); +} + } // namespace // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE @@ -205,7 +316,9 @@ namespace hwy { namespace { HWY_BEFORE_TEST(HwyTableTest); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes); +HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanesOr); HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes); +HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanesOr); HWY_AFTER_TEST(); } // namespace } // namespace hwy