Skip to content

Commit

Permalink
Rebase on top of Benoit's changes
Browse files Browse the repository at this point in the history
Terrible performance is expected
  • Loading branch information
dcaballe committed Nov 15, 2023
1 parent cda2bab commit 6c41f47
Showing 1 changed file with 18 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,24 @@ enumerateMatmulTileX86_64(EncodingUser user, TypeRange elementTypes,
// reconsider when taking advantage of native f16/bf16 arithmetic when the
// accumulator itself is f16/bf16.
if (hasFeature(target, "+avx512f")) {
return {
TileMxNxK{16, 16, 1}, // Aim to use VFMADD* (zmm).
TileMxNxK{8, 16, 1}, // Truncation of the above.
TileMxNxK{4, 16, 1}, // Truncation of the above.
TileMxNxK{2, 16, 1}, // Truncation of the above.
TileMxNxK{1, 16, 1}, // Truncation of the above.
};
if (hasUkernel(target)) {
return {
TileMxNxK{16, 16, 1}, // Aim to use VFMADD* (zmm).
TileMxNxK{8, 16, 1}, // Truncation of the above.
TileMxNxK{4, 16, 1}, // Truncation of the above.
TileMxNxK{2, 16, 1}, // Truncation of the above.
TileMxNxK{1, 16, 1}, // Truncation of the above.
};
} else {
// Code generation tile sizes.
return {
TileMxNxK{16, 16, 1}, // Aim to use VFMADD* (zmm).
TileMxNxK{8, 32, 1}, // Truncation of the above.
TileMxNxK{4, 64, 1}, // Truncation of the above.
TileMxNxK{2, 64, 1}, // Truncation of the above.
TileMxNxK{1, 128, 1}, // Truncation of the above.
};
}
}
if (hasFeature(target, "+avx")) {
// Note: for good performance, most +avx users will also want to add
Expand Down

0 comments on commit 6c41f47

Please sign in to comment.