Skip to content

Commit

Permalink
ukernels: stop abusing signless as signed (#15338)
Browse files Browse the repository at this point in the history
In MLIR we are chronically abusing signless types as signed. Ongoing
discussion at #15241, etc. At least in ukernels we are insulated enough
from the compiler code that we don't have to replicate the same
confusion. So, just because `linalg` named matmul ops want `int8 x int8
-> int32` matmuls to be represented as `i8 x i8 -> i32` even though
these are signless types and this is silently interpreting these as
signed, we didn't have to let that lower to a `_i8i8i32` ukernel, we
could instead more properly represent that as a `_s8s8s32` ukernel.

Fixing this now paves the way for unsigned LHS/RHS support in ukernels,
a step in #15158, without adding more tech debt.

The bulk of is PR is just a regex substitution:

```
find runtime/src/ compiler/src/ -type f | xargs sed -i 's/IREE_UK_FLAG_MMT4D_TYPE_I8I8I32/IREE_UK_FLAG_MMT4D_TYPE_S8S8S32/g;s/iree_uk_mmt4d_type_i8i8i32/iree_uk_mmt4d_type_s8s8s32/g;s/iree_uk_mmt4d_tile_i8i8i32/iree_uk_mmt4d_tile_s8s8s32/g;s/iree_mmt4d_reference_innerloop_i8i8i32/iree_mmt4d_reference_innerloop_s8s8s32/g'
```
  • Loading branch information
bjacob authored Oct 30, 2023
1 parent aeb38b0 commit 41e7289
Show file tree
Hide file tree
Showing 22 changed files with 240 additions and 180 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ matchDAGForUKernel(RewriterBase &rewriter, linalg::Mmt4DOp op,
uint32_t flags = 0;
if (lhsElemType.isSignlessInteger(8) && rhsElemType.isSignlessInteger(8) &&
outElemType.isSignlessInteger(32)) {
flags = IREE_UK_FLAG_MMT4D_TYPE_I8I8I32;
flags = IREE_UK_FLAG_MMT4D_TYPE_S8S8S32;
} else if (lhsElemType.isF32() && rhsElemType.isF32() &&
outElemType.isF32()) {
flags = IREE_UK_FLAG_MMT4D_TYPE_F32F32F32;
Expand Down
12 changes: 6 additions & 6 deletions runtime/src/iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_f16f16f16_4x8x1_arm_64,
iree_uk_mmt4d_tile_f16f16f16_8x8x1_arm_64)

static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x1_to_8x8x1_arm_64(
static inline void iree_uk_mmt4d_tile_s8s8s32_1x8x1_to_8x8x1_arm_64(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -262,8 +262,8 @@ static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x1_to_8x8x1_arm_64(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_i8i8i32_1x8x1_to_8x8x1_arm_64,
iree_uk_mmt4d_tile_i8i8i32_1x8x1_arm_64,
iree_uk_mmt4d_tile_i8i8i32_2x8x1_arm_64,
iree_uk_mmt4d_tile_i8i8i32_4x8x1_arm_64,
iree_uk_mmt4d_tile_i8i8i32_8x8x1_arm_64)
iree_uk_mmt4d_tile_s8s8s32_1x8x1_to_8x8x1_arm_64,
iree_uk_mmt4d_tile_s8s8s32_1x8x1_arm_64,
iree_uk_mmt4d_tile_s8s8s32_2x8x1_arm_64,
iree_uk_mmt4d_tile_s8s8s32_4x8x1_arm_64,
iree_uk_mmt4d_tile_s8s8s32_8x8x1_arm_64)
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "iree/builtins/ukernel/arch/arm_64/common_arm_64.h"
#include "iree/builtins/ukernel/arch/arm_64/mmt4d_arm_64_internal.h"

static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x4_to_8x8x4_arm_64_dotprod(
static inline void iree_uk_mmt4d_tile_s8s8s32_1x8x4_to_8x8x4_arm_64_dotprod(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -71,8 +71,8 @@ static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x4_to_8x8x4_arm_64_dotprod(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_i8i8i32_1x8x4_to_8x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_i8i8i32_1x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_i8i8i32_2x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_i8i8i32_4x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_i8i8i32_8x8x4_arm_64_dotprod)
iree_uk_mmt4d_tile_s8s8s32_1x8x4_to_8x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_s8s8s32_1x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_s8s8s32_2x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_s8s8s32_4x8x4_arm_64_dotprod,
iree_uk_mmt4d_tile_s8s8s32_8x8x4_arm_64_dotprod)
Original file line number Diff line number Diff line change
Expand Up @@ -75,32 +75,32 @@ static iree_uk_mmt4d_tile_func_t
iree_uk_mmt4d_select_tile_func_arm_64_f16f16f16_M0x8x1(
const iree_uk_mmt4d_params_t* params) {
#ifdef IREE_UK_BUILD_ARM_64_FP16
if (iree_uk_cpu_supports_fp16(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_f16f16f16_1x8x1_arm_64_fp16;
case 2:
return iree_uk_mmt4d_tile_f16f16f16_2x8x1_arm_64_fp16;
case 4:
return iree_uk_mmt4d_tile_f16f16f16_4x8x1_arm_64_fp16;
case 8:
return iree_uk_mmt4d_tile_f16f16f16_8x8x1_arm_64_fp16;
}
if (iree_uk_cpu_supports_fp16(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_f16f16f16_1x8x1_arm_64_fp16;
case 2:
return iree_uk_mmt4d_tile_f16f16f16_2x8x1_arm_64_fp16;
case 4:
return iree_uk_mmt4d_tile_f16f16f16_4x8x1_arm_64_fp16;
case 8:
return iree_uk_mmt4d_tile_f16f16f16_8x8x1_arm_64_fp16;
}
}
#endif
if (params->flags & IREE_UK_FLAG_MMT4D_SKIP_INTERMEDIATE_ROUNDINGS) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_f16f16f16_1x8x1_arm_64;
case 2:
return iree_uk_mmt4d_tile_f16f16f16_2x8x1_arm_64;
case 4:
return iree_uk_mmt4d_tile_f16f16f16_4x8x1_arm_64;
case 8:
return iree_uk_mmt4d_tile_f16f16f16_8x8x1_arm_64;
}
if (params->flags & IREE_UK_FLAG_MMT4D_SKIP_INTERMEDIATE_ROUNDINGS) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_f16f16f16_1x8x1_arm_64;
case 2:
return iree_uk_mmt4d_tile_f16f16f16_2x8x1_arm_64;
case 4:
return iree_uk_mmt4d_tile_f16f16f16_4x8x1_arm_64;
case 8:
return iree_uk_mmt4d_tile_f16f16f16_8x8x1_arm_64;
}
return 0;
}
return 0;
}

static iree_uk_mmt4d_tile_func_t
Expand All @@ -116,20 +116,20 @@ static iree_uk_mmt4d_tile_func_t
iree_uk_mmt4d_select_tile_func_arm_64_bf16bf16f32_M0x8x4(
const iree_uk_mmt4d_params_t* params) {
#ifdef IREE_UK_BUILD_ARM_64_BF16
if (iree_uk_cpu_supports_bf16(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_bf16bf16f32_1x8x4_arm_64_bf16;
case 2:
return iree_uk_mmt4d_tile_bf16bf16f32_2x8x4_arm_64_bf16;
case 4:
return iree_uk_mmt4d_tile_bf16bf16f32_4x8x4_arm_64_bf16;
case 8:
return iree_uk_mmt4d_tile_bf16bf16f32_8x8x4_arm_64_bf16;
}
if (iree_uk_cpu_supports_bf16(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_bf16bf16f32_1x8x4_arm_64_bf16;
case 2:
return iree_uk_mmt4d_tile_bf16bf16f32_2x8x4_arm_64_bf16;
case 4:
return iree_uk_mmt4d_tile_bf16bf16f32_4x8x4_arm_64_bf16;
case 8:
return iree_uk_mmt4d_tile_bf16bf16f32_8x8x4_arm_64_bf16;
}
}
#endif
return 0;
return 0;
}

static iree_uk_mmt4d_tile_func_t
Expand All @@ -152,13 +152,13 @@ iree_uk_mmt4d_select_tile_func_arm_64_i8i8i32_M0x8x1(
const iree_uk_mmt4d_params_t* params) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_i8i8i32_1x8x1_arm_64;
return iree_uk_mmt4d_tile_s8s8s32_1x8x1_arm_64;
case 2:
return iree_uk_mmt4d_tile_i8i8i32_2x8x1_arm_64;
return iree_uk_mmt4d_tile_s8s8s32_2x8x1_arm_64;
case 4:
return iree_uk_mmt4d_tile_i8i8i32_4x8x1_arm_64;
return iree_uk_mmt4d_tile_s8s8s32_4x8x1_arm_64;
case 8:
return iree_uk_mmt4d_tile_i8i8i32_8x8x1_arm_64;
return iree_uk_mmt4d_tile_s8s8s32_8x8x1_arm_64;
}
return 0;
}
Expand All @@ -170,13 +170,13 @@ iree_uk_mmt4d_select_tile_func_arm_64_i8i8i32_M0x8x4(
if (iree_uk_cpu_supports_dotprod(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_i8i8i32_1x8x4_arm_64_dotprod;
return iree_uk_mmt4d_tile_s8s8s32_1x8x4_arm_64_dotprod;
case 2:
return iree_uk_mmt4d_tile_i8i8i32_2x8x4_arm_64_dotprod;
return iree_uk_mmt4d_tile_s8s8s32_2x8x4_arm_64_dotprod;
case 4:
return iree_uk_mmt4d_tile_i8i8i32_4x8x4_arm_64_dotprod;
return iree_uk_mmt4d_tile_s8s8s32_4x8x4_arm_64_dotprod;
case 8:
return iree_uk_mmt4d_tile_i8i8i32_8x8x4_arm_64_dotprod;
return iree_uk_mmt4d_tile_s8s8s32_8x8x4_arm_64_dotprod;
}
}
#endif
Expand All @@ -190,13 +190,13 @@ iree_uk_mmt4d_select_tile_func_arm_64_i8i8i32_M0x8x8(
if (iree_uk_cpu_supports_i8mm(params->cpu_data)) {
switch (params->M0) {
case 1:
return iree_uk_mmt4d_tile_i8i8i32_1x8x8_arm_64_i8mm;
return iree_uk_mmt4d_tile_s8s8s32_1x8x8_arm_64_i8mm;
case 2:
return iree_uk_mmt4d_tile_i8i8i32_2x8x8_arm_64_i8mm;
return iree_uk_mmt4d_tile_s8s8s32_2x8x8_arm_64_i8mm;
case 4:
return iree_uk_mmt4d_tile_i8i8i32_4x8x8_arm_64_i8mm;
return iree_uk_mmt4d_tile_s8s8s32_4x8x8_arm_64_i8mm;
case 8:
return iree_uk_mmt4d_tile_i8i8i32_8x8x8_arm_64_i8mm;
return iree_uk_mmt4d_tile_s8s8s32_8x8x8_arm_64_i8mm;
}
}
#endif
Expand Down Expand Up @@ -230,7 +230,7 @@ iree_uk_mmt4d_tile_func_t iree_uk_mmt4d_select_tile_func_arch(
return iree_uk_mmt4d_select_tile_func_arm_64_bf16bf16f32(params);
case iree_uk_mmt4d_type_bf16bf16bf16:
return iree_uk_mmt4d_select_tile_func_arm_64_bf16bf16bf16(params);
case iree_uk_mmt4d_type_i8i8i32:
case iree_uk_mmt4d_type_s8s8s32:
return iree_uk_mmt4d_select_tile_func_arm_64_i8i8i32(params);
default:
IREE_UK_ASSUME_UNREACHABLE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ static inline int32x4_t iree_uk_neon_uzp2_s32_as_s64(int32x4_t a, int32x4_t b) {
vuzp2q_s64(vreinterpretq_s64_s32(a), vreinterpretq_s64_s32(b)));
}

void iree_uk_mmt4d_tile_i8i8i32_1x8x8_to_8x8x8_arm_64_i8mm(
void iree_uk_mmt4d_tile_s8s8s32_1x8x8_to_8x8x8_arm_64_i8mm(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -99,8 +99,8 @@ void iree_uk_mmt4d_tile_i8i8i32_1x8x8_to_8x8x8_arm_64_i8mm(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_i8i8i32_1x8x8_to_8x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_i8i8i32_1x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_i8i8i32_2x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_i8i8i32_4x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_i8i8i32_8x8x8_arm_64_i8mm)
iree_uk_mmt4d_tile_s8s8s32_1x8x8_to_8x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_s8s8s32_1x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_s8s8s32_2x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_s8s8s32_4x8x8_arm_64_i8mm,
iree_uk_mmt4d_tile_s8s8s32_8x8x8_arm_64_i8mm)
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_bf16bf16f32_1x8x4_arm_64_bf16)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_bf16bf16f32_2x8x4_arm_64_bf16)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_bf16bf16f32_4x8x4_arm_64_bf16)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_bf16bf16f32_8x8x4_arm_64_bf16)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_1x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_2x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_4x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_8x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_1x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_2x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_4x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_8x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_1x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_2x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_4x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_i8i8i32_8x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_1x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_2x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_4x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_8x8x1_arm_64)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_1x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_2x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_4x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_8x8x4_arm_64_dotprod)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_1x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_2x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_4x8x8_arm_64_i8mm)
IREE_UK_MMT4D_TILE_FUNC_DECL(iree_uk_mmt4d_tile_s8s8s32_8x8x8_arm_64_i8mm)

#endif // IREE_BUILTINS_UKERNEL_ARCH_ARM_64_MMT4D_ARM_64_INTERNAL_H_
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_f16f16f16_4x8x1_x86_64_avx2_fma,
iree_uk_mmt4d_tile_f16f16f16_8x8x1_x86_64_avx2_fma)

static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x2_to_8x8x2_x86_64_avx2_fma(
static inline void iree_uk_mmt4d_tile_s8s8s32_1x8x2_to_8x8x2_x86_64_avx2_fma(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -216,8 +216,8 @@ static inline void iree_uk_mmt4d_tile_i8i8i32_1x8x2_to_8x8x2_x86_64_avx2_fma(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8(
iree_uk_mmt4d_tile_i8i8i32_1x8x2_to_8x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_i8i8i32_1x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_i8i8i32_2x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_i8i8i32_4x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_i8i8i32_8x8x2_x86_64_avx2_fma)
iree_uk_mmt4d_tile_s8s8s32_1x8x2_to_8x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_s8s8s32_1x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_s8s8s32_2x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_s8s8s32_4x8x2_x86_64_avx2_fma,
iree_uk_mmt4d_tile_s8s8s32_8x8x2_x86_64_avx2_fma)
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8_16(
iree_uk_mmt4d_tile_f16f16f16_16x16x1_x86_64_avx512_base)

static inline void
iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_base(
iree_uk_mmt4d_tile_s8s8s32_1x16x2_to_16x16x2_x86_64_avx512_base(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -300,9 +300,9 @@ iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_base(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8_16(
iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_i8i8i32_1x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_i8i8i32_2x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_i8i8i32_4x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_i8i8i32_8x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_i8i8i32_16x16x2_x86_64_avx512_base)
iree_uk_mmt4d_tile_s8s8s32_1x16x2_to_16x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_s8s8s32_1x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_s8s8s32_2x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_s8s8s32_4x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_s8s8s32_8x16x2_x86_64_avx512_base,
iree_uk_mmt4d_tile_s8s8s32_16x16x2_x86_64_avx512_base)
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "iree/builtins/ukernel/arch/x86_64/mmt4d_x86_64_internal.h"

static inline void
iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_vnni(
iree_uk_mmt4d_tile_s8s8s32_1x16x2_to_16x16x2_x86_64_avx512_vnni(
void* IREE_UK_RESTRICT out_tile, const void* IREE_UK_RESTRICT lhs_panel,
const void* IREE_UK_RESTRICT rhs_panel,
const iree_uk_mmt4d_params_t* params, int M0) {
Expand Down Expand Up @@ -129,9 +129,9 @@ iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_vnni(
}

IREE_UK_MMT4D_TILE_FUNC_IMPL_FOR_M0_1_2_4_8_16(
iree_uk_mmt4d_tile_i8i8i32_1x16x2_to_16x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_i8i8i32_1x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_i8i8i32_2x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_i8i8i32_4x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_i8i8i32_8x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_i8i8i32_16x16x2_x86_64_avx512_vnni)
iree_uk_mmt4d_tile_s8s8s32_1x16x2_to_16x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_s8s8s32_1x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_s8s8s32_2x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_s8s8s32_4x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_s8s8s32_8x16x2_x86_64_avx512_vnni,
iree_uk_mmt4d_tile_s8s8s32_16x16x2_x86_64_avx512_vnni)
Loading

0 comments on commit 41e7289

Please sign in to comment.