Skip to content

Commit

Permalink
[FMV][AArch64] Unify features memtag and memtag2. (#112511)
Browse files Browse the repository at this point in the history
If we split these features in the compiler (see relevant pull request
#109299), we would only be able
to hand-write a 'memtag2' version using inline assembly since the
compiler cannot generate the instructions that become available with
FEAT_MTE2. However these instructions only work at Exception Level 1, so
they would be unusable since FMV is a user space facility. I am
therefore unifying them.

Approved in ACLE as ARM-software/acle#351
  • Loading branch information
labrinea authored Oct 21, 2024
1 parent 6e1a7ac commit b6e9ba0
Show file tree
Hide file tree
Showing 12 changed files with 39 additions and 40 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2669,7 +2669,7 @@ sign. For example:

.. code-block:: c++

__attribute__((target_clones("sha2+memtag2", "fcma+sve2-pmull128")))
__attribute__((target_clones("sha2+memtag", "fcma+sve2-pmull128")))
void foo() {}

For every multiversioned function a ``default`` (fallback) implementation
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Basic/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
.Case("sme-fa64", HasSMEFA64)
.Case("sme-f16f16", HasSMEF16F16)
.Case("sme-b16b16", HasSMEB16B16)
.Cases("memtag", "memtag2", HasMTE)
.Case("memtag", HasMTE)
.Case("sb", HasSB)
.Case("predres", HasPredRes)
.Cases("ssbs", "ssbs2", HasSSBS)
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/aarch64-cpu-supports-target.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ int check_all_feature() {
return 7;
else if (__builtin_cpu_supports("sve2-bitperm+sve2-sha3+sve2-sm4"))
return 8;
else if (__builtin_cpu_supports("sme+memtag+memtag2+memtag3+sb"))
else if (__builtin_cpu_supports("sme+memtag+memtag3+sb"))
return 9;
else if (__builtin_cpu_supports("predres+ssbs+ssbs2+bti+ls64+ls64_v"))
return 10;
Expand Down
15 changes: 10 additions & 5 deletions clang/test/CodeGen/aarch64-cpu-supports.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 2
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".*"
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s

//.
// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
// CHECK-LABEL: define dso_local i32 @main
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
//.
// CHECK-LABEL: @main(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
Expand All @@ -17,8 +18,8 @@
// CHECK-NEXT: br label [[RETURN:%.*]]
// CHECK: if.end:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 9070970929152
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 9070970929152
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17867063951360
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17867063951360
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[IF_THEN1:%.*]], label [[IF_END2:%.*]]
// CHECK: if.then1:
Expand Down Expand Up @@ -60,3 +61,7 @@ int main(void) {

return 0;
}
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
9 changes: 3 additions & 6 deletions clang/test/CodeGen/aarch64-fmv-dependencies.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,10 @@ __attribute__((target_version("ls64"))) int fmv(void) { return 0; }
// CHECK: define dso_local i32 @fmv._Mlse() #[[lse:[0-9]+]] {
__attribute__((target_version("lse"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mmemtag() #[[ATTR0:[0-9]+]] {
// CHECK: define dso_local i32 @fmv._Mmemtag() #[[memtag:[0-9]+]] {
__attribute__((target_version("memtag"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mmemtag2() #[[memtag2:[0-9]+]] {
__attribute__((target_version("memtag2"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mmemtag3() #[[memtag2:[0-9]+]] {
// CHECK: define dso_local i32 @fmv._Mmemtag3() #[[memtag:[0-9]+]] {
__attribute__((target_version("memtag3"))) int fmv(void) { return 0; }

// CHECK: define dso_local i32 @fmv._Mmops() #[[mops:[0-9]+]] {
Expand Down Expand Up @@ -200,7 +197,7 @@ int caller() {
// CHECK: attributes #[[jscvt]] = { {{.*}} "target-features"="+fp-armv8,+jsconv,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[ls64]] = { {{.*}} "target-features"="+fp-armv8,+ls64,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[lse]] = { {{.*}} "target-features"="+fp-armv8,+lse,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[memtag2]] = { {{.*}} "target-features"="+fp-armv8,+mte,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[memtag]] = { {{.*}} "target-features"="+fp-armv8,+mte,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[mops]] = { {{.*}} "target-features"="+fp-armv8,+mops,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[pmull]] = { {{.*}} "target-features"="+aes,+fp-armv8,+neon,+outline-atomics,+v8a"
// CHECK: attributes #[[predres]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+predres,+v8a"
Expand Down
20 changes: 10 additions & 10 deletions clang/test/CodeGen/attr-target-clones-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +mte -target-feature +bti -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-MTE-BTI

int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; }
int __attribute__((target_clones("sha2", "sha2+memtag", " default "))) ftc_def(void) { return 1; }
int __attribute__((target_clones("sha2", "default"))) ftc_dup1(void) { return 2; }
int __attribute__((target_clones("fp", "crc+dotprod"))) ftc_dup2(void) { return 3; }
int __attribute__((target_clones("memtag2", "bti"))) ftc_dup3(void) { return 4; }
int __attribute__((target_clones("memtag", "bti"))) ftc_dup3(void) { return 4; }
int foo() {
return ftc() + ftc_def() + ftc_dup1() + ftc_dup2() + ftc_dup3();
}
Expand Down Expand Up @@ -90,7 +90,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@ftc_def._Mmemtag2Msha2
// CHECK-LABEL: define {{[^@]+}}@ftc_def._MmemtagMsha2
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
Expand All @@ -105,7 +105,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK: resolver_return:
// CHECK-NEXT: ret ptr @ftc_def._Mmemtag2Msha2
// CHECK-NEXT: ret ptr @ftc_def._MmemtagMsha2
// CHECK: resolver_else:
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096
Expand Down Expand Up @@ -176,7 +176,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag2
// CHECK-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag
// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 4
Expand Down Expand Up @@ -206,7 +206,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK: resolver_return1:
// CHECK-NEXT: ret ptr @ftc_dup3._Mmemtag2
// CHECK-NEXT: ret ptr @ftc_dup3._Mmemtag
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc_dup3.default
//
Expand Down Expand Up @@ -547,7 +547,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
//
//
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone
// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def._Mmemtag2Msha2
// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_def._MmemtagMsha2
// CHECK-MTE-BTI-SAME: () #[[ATTR2]] {
// CHECK-MTE-BTI-NEXT: entry:
// CHECK-MTE-BTI-NEXT: ret i32 1
Expand All @@ -562,7 +562,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
// CHECK-MTE-BTI: resolver_return:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_def._Mmemtag2Msha2
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_def._MmemtagMsha2
// CHECK-MTE-BTI: resolver_else:
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4096
Expand Down Expand Up @@ -633,7 +633,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
//
//
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone
// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag2
// CHECK-MTE-BTI-LABEL: define {{[^@]+}}@ftc_dup3._Mmemtag
// CHECK-MTE-BTI-SAME: () #[[ATTR5:[0-9]+]] {
// CHECK-MTE-BTI-NEXT: entry:
// CHECK-MTE-BTI-NEXT: ret i32 4
Expand Down Expand Up @@ -663,7 +663,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
// CHECK-MTE-BTI: resolver_return1:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag2
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag
// CHECK-MTE-BTI: resolver_else2:
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3.default
//
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/attr-target-version.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { re
inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; }
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
inline int __attribute__((target_version("sve2-sm4+memtag"))) fmv_inline(void) { return 10; }
inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
inline int __attribute__((target_version("aes+dotprod"))) fmv_inline(void) { return 13; }
inline int __attribute__((target_version("simd+fp16fml"))) fmv_inline(void) { return 14; }
Expand Down Expand Up @@ -500,8 +500,8 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
// CHECK-NEXT: ret ptr @fmv._McrcMls64
// CHECK: resolver_else6:
// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 8796093022216
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 8796093022216
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17592186044424
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17592186044424
// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
// CHECK: resolver_return7:
Expand Down Expand Up @@ -729,7 +729,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mmemtag2Msve2-sm4
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MmemtagMsve2-sm4
// CHECK-SAME: () #[[ATTR34:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 10
Expand All @@ -751,21 +751,21 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mfp16fmlMsimd
// CHECK-SAME: () #[[ATTR4]] {
// CHECK-SAME: () #[[ATTR36:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 14
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MfpMsm4
// CHECK-SAME: () #[[ATTR36:[0-9]+]] {
// CHECK-SAME: () #[[ATTR37:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 15
//
//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MlseMrdm
// CHECK-SAME: () #[[ATTR37:[0-9]+]] {
// CHECK-SAME: () #[[ATTR38:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 16
//
Expand Down Expand Up @@ -826,7 +826,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
// CHECK-NEXT: [[TMP23:%.*]] = and i1 true, [[TMP22]]
// CHECK-NEXT: br i1 [[TMP23]], label [[RESOLVER_RETURN9:%.*]], label [[RESOLVER_ELSE10:%.*]]
// CHECK: resolver_return9:
// CHECK-NEXT: ret ptr @fmv_inline._Mmemtag2Msve2-sm4
// CHECK-NEXT: ret ptr @fmv_inline._MmemtagMsve2-sm4
// CHECK: resolver_else10:
// CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
// CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 1236950581248
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Sema/attr-target-clones-aarch64.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ int __attribute__((target_clones("rng", "fp16fml+fp", "default"))) redecl4(void)
// expected-error@+3 {{'target_clones' attribute does not match previous declaration}}
// expected-note@-2 {{previous declaration is here}}
// expected-warning@+1 {{version list contains entries that don't impact code generation}}
int __attribute__((target_clones("dgh+memtag+rpres", "ebf16+dpb", "default"))) redecl4(void) { return 1; }
int __attribute__((target_clones("dgh+rpres", "ebf16+dpb", "default"))) redecl4(void) { return 1; }

int __attribute__((target_version("flagm2"))) redef2(void) { return 1; }
// expected-error@+2 {{multiversioned function redeclarations require identical target attributes}}
Expand Down
2 changes: 1 addition & 1 deletion compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ enum CPUFeatures {
FEAT_SVE_SHA3,
FEAT_SVE_SM4,
FEAT_SME,
FEAT_MEMTAG,
RESERVED_FEAT_MEMTAG, // previously used and now ABI legacy
FEAT_MEMTAG2,
FEAT_MEMTAG3,
FEAT_SB,
Expand Down
4 changes: 1 addition & 3 deletions compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_SB);
if (hwcap & HWCAP_SSBS)
setCPUFeature(FEAT_SSBS2);
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
if (hwcap2 & HWCAP2_MTE)
setCPUFeature(FEAT_MEMTAG2);
}
if (hwcap2 & HWCAP2_MTE3)
setCPUFeature(FEAT_MEMTAG3);
if (hwcap2 & HWCAP2_SVEAES)
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ enum CPUFeatures {
FEAT_SVE_SHA3,
FEAT_SVE_SM4,
FEAT_SME,
FEAT_MEMTAG,
RESERVED_FEAT_MEMTAG, // previously used and now ABI legacy
FEAT_MEMTAG2,
FEAT_MEMTAG3,
FEAT_SB,
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AArch64/AArch64FMV.td
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def : FMVExtension<"i8mm", "FEAT_I8MM", "+i8mm", 270>;
def : FMVExtension<"jscvt", "FEAT_JSCVT", "+fp-armv8,+neon,+jsconv", 210>;
def : FMVExtension<"ls64", "FEAT_LS64_ACCDATA", "+ls64", 520>;
def : FMVExtension<"lse", "FEAT_LSE", "+lse", 80>;
def : FMVExtension<"memtag", "FEAT_MEMTAG", "", 440>;
def : FMVExtension<"memtag2", "FEAT_MEMTAG2", "+mte", 450>;
def : FMVExtension<"memtag", "FEAT_MEMTAG2", "+mte", 440>;
def : FMVExtension<"memtag3", "FEAT_MEMTAG3", "+mte", 460>;
def : FMVExtension<"mops", "FEAT_MOPS", "+mops", 650>;
def : FMVExtension<"pmull", "FEAT_PMULL", "+aes,+fp-armv8,+neon", 160>;
Expand Down

0 comments on commit b6e9ba0

Please sign in to comment.