Skip to content

Commit

Permalink
[AArch64][NFC] Refactor the tail-folding option
Browse files Browse the repository at this point in the history
This patch does simple refactoring of the tail-folding
option in preparation for enabling tail-folding by
default for neoverse-v1. It adds a default tail-folding
option field to the AArch64Subtarget class that can
be set on a per-CPU.

Differential Revision: https://reviews.llvm.org/D149659
  • Loading branch information
david-arm committed May 17, 2023
1 parent 4241ac5 commit 7beb2ca
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 68 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;

/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
Expand Down Expand Up @@ -397,6 +398,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {

unsigned getVScaleForTuning() const { return VScaleForTuning; }

TailFoldingOpts getSVETailFoldingDefaultOpts() const {
return DefaultSVETFOpts;
}

const char* getChkStkName() const {
if (isWindowsArm64EC())
return "__chkstk_arm64ec";
Expand Down
190 changes: 123 additions & 67 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,81 +40,136 @@ static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
cl::init(10), cl::Hidden);

namespace {
class TailFoldingKind {
private:
uint8_t Bits = 0; // Currently defaults to disabled.
class TailFoldingOption {
// These bitfields will only ever be set to something non-zero in operator=,
// when setting the -sve-tail-folding option. This option should always be of
// the form (default|simple|all|disable)[+(Flag1|Flag2|etc)], where here
// InitialBits is one of (disabled|all|simple). EnableBits represents
// additional flags we're enabling, and DisableBits for those flags we're
// disabling. The default flag is tracked in the variable NeedsDefault, since
// at the time of setting the option we may not know what the default value
// for the CPU is.
TailFoldingOpts InitialBits = TailFoldingOpts::Disabled;
TailFoldingOpts EnableBits = TailFoldingOpts::Disabled;
TailFoldingOpts DisableBits = TailFoldingOpts::Disabled;

// This value needs to be initialised to true in case the user does not
// explicitly set the -sve-tail-folding option.
bool NeedsDefault = true;

void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }

void setNeedsDefault(bool V) { NeedsDefault = V; }

void setEnableBit(TailFoldingOpts Bit) {
EnableBits |= Bit;
DisableBits &= ~Bit;
}

void setDisableBit(TailFoldingOpts Bit) {
EnableBits &= ~Bit;
DisableBits |= Bit;
}

TailFoldingOpts getBits(TailFoldingOpts DefaultBits) const {
TailFoldingOpts Bits = TailFoldingOpts::Disabled;

assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
"Initial bits should only include one of "
"(disabled|all|simple|default)");
Bits = NeedsDefault ? DefaultBits : InitialBits;
Bits |= EnableBits;
Bits &= ~DisableBits;

return Bits;
}

void reportError(std::string Opt) {
errs() << "invalid argument '" << Opt
<< "' to -sve-tail-folding=; the option should be of the form\n"
" (disabled|all|default|simple)[+(reductions|recurrences"
"|reverse|noreductions|norecurrences|noreverse)]\n";
report_fatal_error("Unrecognised tail-folding option");
}

public:
enum TailFoldingOpts {
TFDisabled = 0x0,
TFReductions = 0x01,
TFRecurrences = 0x02,
TFReverse = 0x04,
TFSimple = 0x80,
TFAll = TFReductions | TFRecurrences | TFReverse | TFSimple
};

void operator=(const std::string &Val) {
if (Val.empty())
// If the user explicitly sets -sve-tail-folding= then treat as an error.
if (Val.empty()) {
reportError("");
return;
SmallVector<StringRef, 6> TailFoldTypes;
}

// Since the user is explicitly setting the option we don't automatically
// need the default unless they require it.
setNeedsDefault(false);

SmallVector<StringRef, 4> TailFoldTypes;
StringRef(Val).split(TailFoldTypes, '+', -1, false);
for (auto TailFoldType : TailFoldTypes) {
if (TailFoldType == "disabled")
Bits = 0;
else if (TailFoldType == "all")
Bits = TFAll;
else if (TailFoldType == "default")
Bits = 0; // Currently defaults to never tail-folding.
else if (TailFoldType == "simple")
add(TFSimple);
else if (TailFoldType == "reductions")
add(TFReductions);
else if (TailFoldType == "recurrences")
add(TFRecurrences);
else if (TailFoldType == "reverse")
add(TFReverse);
else if (TailFoldType == "noreductions")
remove(TFReductions);
else if (TailFoldType == "norecurrences")
remove(TFRecurrences);
else if (TailFoldType == "noreverse")
remove(TFReverse);
else {
errs()
<< "invalid argument " << TailFoldType.str()
<< " to -sve-tail-folding=; each element must be one of: disabled, "
"all, default, simple, reductions, noreductions, recurrences, "
"norecurrences\n";
}

unsigned StartIdx = 1;
if (TailFoldTypes[0] == "disabled")
setInitialBits(TailFoldingOpts::Disabled);
else if (TailFoldTypes[0] == "all")
setInitialBits(TailFoldingOpts::All);
else if (TailFoldTypes[0] == "default")
setNeedsDefault(true);
else if (TailFoldTypes[0] == "simple")
setInitialBits(TailFoldingOpts::Simple);
else {
StartIdx = 0;
setInitialBits(TailFoldingOpts::Disabled);
}
}

operator uint8_t() const { return Bits; }
for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {
if (TailFoldTypes[I] == "reductions")
setEnableBit(TailFoldingOpts::Reductions);
else if (TailFoldTypes[I] == "recurrences")
setEnableBit(TailFoldingOpts::Recurrences);
else if (TailFoldTypes[I] == "reverse")
setEnableBit(TailFoldingOpts::Reverse);
else if (TailFoldTypes[I] == "noreductions")
setDisableBit(TailFoldingOpts::Reductions);
else if (TailFoldTypes[I] == "norecurrences")
setDisableBit(TailFoldingOpts::Recurrences);
else if (TailFoldTypes[I] == "noreverse")
setDisableBit(TailFoldingOpts::Reverse);
else
reportError(Val);
}
}

void add(uint8_t Flag) { Bits |= Flag; }
void remove(uint8_t Flag) { Bits &= ~Flag; }
bool satisfies(TailFoldingOpts DefaultBits, TailFoldingOpts Required) const {
return (getBits(DefaultBits) & Required) == Required;
}
};
} // namespace

TailFoldingKind TailFoldingKindLoc;
TailFoldingOption TailFoldingOptionLoc;

cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
cl::opt<TailFoldingOption, true, cl::parser<std::string>> SVETailFolding(
"sve-tail-folding",
cl::desc(
"Control the use of vectorisation using tail-folding for SVE:"
"\ndisabled No loop types will vectorize using tail-folding"
"\ndefault Uses the default tail-folding settings for the target "
"CPU"
"\nall All legal loop types will vectorize using tail-folding"
"\nsimple Use tail-folding for simple loops (not reductions or "
"recurrences)"
"\nreductions Use tail-folding for loops containing reductions"
"\nrecurrences Use tail-folding for loops containing fixed order "
"Control the use of vectorisation using tail-folding for SVE where the"
" option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
"\ndisabled (Initial) No loop types will vectorize using "
"tail-folding"
"\ndefault (Initial) Uses the default tail-folding settings for "
"the target CPU"
"\nall (Initial) All legal loop types will vectorize using "
"tail-folding"
"\nsimple (Initial) Use tail-folding for simple loops (not "
"reductions or recurrences)"
"\nreductions Use tail-folding for loops containing reductions"
"\nnoreductions Inverse of above"
"\nrecurrences Use tail-folding for loops containing fixed order "
"recurrences"
"\nreverse Use tail-folding for loops requiring reversed "
"predicates"),
cl::location(TailFoldingKindLoc));
"\nnorecurrences Inverse of above"
"\nreverse Use tail-folding for loops requiring reversed "
"predicates"
"\nnoreverse Inverse of above"),
cl::location(TailFoldingOptionLoc));

// Experimental option that will only be fully functional when the
// code-generator is changed to use SVE instead of NEON for all fixed-width
Expand Down Expand Up @@ -3479,7 +3534,7 @@ static bool containsDecreasingPointers(Loop *TheLoop,
}

bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled)
if (!ST->hasSVE())
return false;

// We don't currently support vectorisation with interleaving for SVE - with
Expand All @@ -3488,22 +3543,23 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
if (TFI->IAI->hasGroups())
return false;

TailFoldingKind Required; // Defaults to 0.
TailFoldingOpts Required = TailFoldingOpts::Disabled;
if (TFI->LVL->getReductionVars().size())
Required.add(TailFoldingKind::TFReductions);
Required |= TailFoldingOpts::Reductions;
if (TFI->LVL->getFixedOrderRecurrences().size())
Required.add(TailFoldingKind::TFRecurrences);
Required |= TailFoldingOpts::Recurrences;

// We call this to discover whether any load/store pointers in the loop have
// negative strides. This will require extra work to reverse the loop
// predicate, which may be expensive.
if (containsDecreasingPointers(TFI->LVL->getLoop(),
TFI->LVL->getPredicatedScalarEvolution()))
Required.add(TailFoldingKind::TFReverse);
if (!Required)
Required.add(TailFoldingKind::TFSimple);
Required |= TailFoldingOpts::Reverse;
if (Required == TailFoldingOpts::Disabled)
Required |= TailFoldingOpts::Simple;

return (TailFoldingKindLoc & Required) == Required;
return TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(),
Required);
}

InstructionCost
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// FIXME: Is it easiest to fix this layering violation by moving the .inc
// #includes from AArch64MCTargetDesc.h to here?
#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/SubtargetFeature.h"
Expand Down Expand Up @@ -529,6 +530,27 @@ getSVEPredPatternFromNumElements(unsigned MinNumElts) {
}
}

/// An enum to describe what types of loops we should attempt to tail-fold:
/// Disabled: None
/// Reductions: Loops containing reductions
/// Recurrences: Loops with first-order recurrences, i.e. that would
/// require a SVE splice instruction
/// Reverse: Reverse loops
/// Simple: Loops that are not reversed and don't contain reductions
/// or first-order recurrences.
/// All: All
enum class TailFoldingOpts : uint8_t {
Disabled = 0x00,
Simple = 0x01,
Reductions = 0x02,
Recurrences = 0x04,
Reverse = 0x08,
All = Reductions | Recurrences | Simple | Reverse
};

LLVM_DECLARE_ENUM_AS_BITMASK(TailFoldingOpts,
/* LargestValue */ (long)TailFoldingOpts::Reverse);

namespace AArch64ExactFPImm {
struct ExactFPImm {
const char *Name;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC
; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV
Expand Down

0 comments on commit 7beb2ca

Please sign in to comment.