Skip to content

Commit

Permalink
AArch64: add support for newer Apple CPUs
Browse files Browse the repository at this point in the history
They're roughly ARMv8.6. This works in the .td file, but in
AArch64TargetParser.def, marking them v8.6 brings in support for the SM4
cryptographic hash and we don't actually have that. So TargetParser side
they're marked as v8.5, with the extra features (BF16 and I8MM added manually).

Finally, A16 supports the HCX extension in addition to v8.6. This has no
TargetParser implications.
  • Loading branch information
TNorthover committed Sep 22, 2022
1 parent e030be6 commit 677da09
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 5 deletions.
4 changes: 2 additions & 2 deletions clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
// AARCH64: error: unknown target CPU 'not-a-cpu'
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}

// RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
// TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}
// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1{{$}}

// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/Support/AArch64TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,17 @@ AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-a15", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-a16", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-m1", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3))
AARCH64_CPU_NAME("apple-m2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM))
AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16))
AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false,
Expand Down
49 changes: 47 additions & 2 deletions llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,38 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureZCRegMove,
FeatureZCZeroing]>;

def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
"Apple A15", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCZeroing
]>;

def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
"Apple A16", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCZeroing
]>;

def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureExynosCheapAsMoveHandling,
Expand Down Expand Up @@ -1072,6 +1104,13 @@ def ProcessorFeatures {
FeaturePredRes, FeatureCacheDeepPersist,
FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
FeatureAltFPCmp];
list<SubtargetFeature> AppleA15 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML];
list<SubtargetFeature> AppleA16 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
Expand Down Expand Up @@ -1229,10 +1268,16 @@ def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
[TuneAppleA13]>;
def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;

// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;

// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
Expand All @@ -1241,8 +1286,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
[TuneAppleA12]>;

// Alias for the latest Apple processor model supported by LLVM.
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;

// Fujitsu A64FX
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ void AArch64Subtarget::initializeProperties() {
case AppleA12:
case AppleA13:
case AppleA14:
case AppleA15:
case AppleA16:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
AppleA12,
AppleA13,
AppleA14,
AppleA15,
AppleA16,
Carmel,
CortexA35,
CortexA53,
Expand Down
29 changes: 28 additions & 1 deletion llvm/unittests/Support/TargetParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,24 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
"8.5-A"),
ARMCPUTestParams("apple-a15", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-a16", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-m1", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
Expand All @@ -1120,6 +1138,15 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3,
"8.5-A"),
ARMCPUTestParams("apple-m2", "armv8.5-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
AArch64::AEK_LSE | AArch64::AEK_RAS |
AArch64::AEK_RDM | AArch64::AEK_RCPC |
AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
AArch64::AEK_BF16 | AArch64::AEK_I8MM,
"8.5-A"),
ARMCPUTestParams("apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
AArch64::AEK_CRC | AArch64::AEK_CRYPTO |
AArch64::AEK_FP | AArch64::AEK_SIMD |
Expand Down Expand Up @@ -1257,7 +1284,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_LSE | AArch64::AEK_RDM,
"8.2-A")));

static constexpr unsigned NumAArch64CPUArchs = 54;
static constexpr unsigned NumAArch64CPUArchs = 57;

TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
Expand Down

0 comments on commit 677da09

Please sign in to comment.