Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -128,6 +128,7 @@ unsigned MinSVEVectorSizeInBits; unsigned MaxSVEVectorSizeInBits; unsigned VScaleForTuning = 2; + TailFoldingOpts::IntType DefaultSVETFOpts = TailFoldingOpts::Disabled; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -390,6 +391,10 @@ unsigned getVScaleForTuning() const { return VScaleForTuning; } + TailFoldingOpts::IntType getSVETailFoldingDefaultOpts() const { + return DefaultSVETFOpts; + } + const char* getChkStkName() const { if (isWindowsArm64EC()) return "__chkstk_arm64ec"; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -40,81 +40,134 @@ cl::init(10), cl::Hidden); namespace { -class TailFoldingKind { -private: - uint8_t Bits = 0; // Currently defaults to disabled. +class TailFoldingOption { + // These bitfields will only ever be set to something non-zero in operator=, + // when setting the -sve-tail-folding option. This option should always be of + // the form (default|simple|all|disable)[+(Flag1|Flag2|etc)], where here + // InitialBits is one of (disabled|all|simple). EnableBits represents + // additional flags we're enabling, and DisableBits for those flags we're + // disabling. The default flag is tracked in the variable NeedsDefault, since + // at the time of setting the option we may not know what the default value + // for the CPU is. + TailFoldingOpts::IntType InitialBits = 0; + TailFoldingOpts::IntType EnableBits = 0; + TailFoldingOpts::IntType DisableBits = 0; + + // This value needs to be initialised to true in case the user does not + // explicitly set the -sve-tail-folding option. + bool NeedsDefault = true; + + void setInitialBits(TailFoldingOpts::IntType Bits) { InitialBits = Bits; } + + void setNeedsDefault(bool V) { NeedsDefault = V; } + + void setEnableBit(TailFoldingOpts::IntType Bit) { + EnableBits |= Bit; + DisableBits &= ~Bit; + } + + void setDisableBit(TailFoldingOpts::IntType Bit) { + EnableBits &= ~Bit; + DisableBits |= Bit; + } + + TailFoldingOpts::IntType getBits(TailFoldingOpts::IntType DefaultBits) const { + TailFoldingOpts::IntType Bits = 0; + + assert((!InitialBits || !NeedsDefault) && + "Initial bits should only include one of " + "(disabled|all|simple|default)"); + Bits = NeedsDefault ? DefaultBits : InitialBits; + Bits |= EnableBits; + Bits &= ~DisableBits; + + return Bits; + } + + void reportError(std::string Opt) { + errs() << "invalid argument '" << Opt + << "' to -sve-tail-folding=; the option should be of the form\n" + " (disabled|all|default|simple)[+(reductions|recurrences" + "|reverse|noreductions|norecurrences|noreverse)]\n"; + report_fatal_error("Unrecognised tail-folding option"); + } public: - enum TailFoldingOpts { - TFDisabled = 0x0, - TFReductions = 0x01, - TFRecurrences = 0x02, - TFReverse = 0x04, - TFSimple = 0x80, - TFAll = TFReductions | TFRecurrences | TFReverse | TFSimple - }; void operator=(const std::string &Val) { - if (Val.empty()) + // If the user explicitly sets -sve-tail-folding= then treat as an error. + if (Val.empty()) { + reportError(""); return; - SmallVector TailFoldTypes; + } + + // Since the user is explicitly setting the option we don't automatically + // need the default unless they require it. + setNeedsDefault(false); + + SmallVector TailFoldTypes; StringRef(Val).split(TailFoldTypes, '+', -1, false); - for (auto TailFoldType : TailFoldTypes) { - if (TailFoldType == "disabled") - Bits = 0; - else if (TailFoldType == "all") - Bits = TFAll; - else if (TailFoldType == "default") - Bits = 0; // Currently defaults to never tail-folding. - else if (TailFoldType == "simple") - add(TFSimple); - else if (TailFoldType == "reductions") - add(TFReductions); - else if (TailFoldType == "recurrences") - add(TFRecurrences); - else if (TailFoldType == "reverse") - add(TFReverse); - else if (TailFoldType == "noreductions") - remove(TFReductions); - else if (TailFoldType == "norecurrences") - remove(TFRecurrences); - else if (TailFoldType == "noreverse") - remove(TFReverse); - else { - errs() - << "invalid argument " << TailFoldType.str() - << " to -sve-tail-folding=; each element must be one of: disabled, " - "all, default, simple, reductions, noreductions, recurrences, " - "norecurrences\n"; - } + + if (TailFoldTypes[0] == "disabled") + setInitialBits(0); + else if (TailFoldTypes[0] == "all") + setInitialBits(TailFoldingOpts::All); + else if (TailFoldTypes[0] == "default") + setNeedsDefault(true); + else if (TailFoldTypes[0] == "simple") + setInitialBits(TailFoldingOpts::Simple); + else + reportError(Val); + + for (unsigned i = 1; i < TailFoldTypes.size(); i++) { + if (TailFoldTypes[i] == "reductions") + setEnableBit(TailFoldingOpts::Reductions); + else if (TailFoldTypes[i] == "recurrences") + setEnableBit(TailFoldingOpts::Recurrences); + else if (TailFoldTypes[i] == "reverse") + setEnableBit(TailFoldingOpts::Reverse); + else if (TailFoldTypes[i] == "noreductions") + setDisableBit(TailFoldingOpts::Reductions); + else if (TailFoldTypes[i] == "norecurrences") + setDisableBit(TailFoldingOpts::Recurrences); + else if (TailFoldTypes[i] == "noreverse") + setDisableBit(TailFoldingOpts::Reverse); + else + reportError(Val); } } - operator uint8_t() const { return Bits; } - - void add(uint8_t Flag) { Bits |= Flag; } - void remove(uint8_t Flag) { Bits &= ~Flag; } + bool satisfies(TailFoldingOpts::IntType DefaultBits, + TailFoldingOpts::IntType Required) const { + return (getBits(DefaultBits) & Required) == Required; + } }; } // namespace -TailFoldingKind TailFoldingKindLoc; +TailFoldingOption TailFoldingOptionLoc; -cl::opt> SVETailFolding( +cl::opt> SVETailFolding( "sve-tail-folding", cl::desc( - "Control the use of vectorisation using tail-folding for SVE:" - "\ndisabled No loop types will vectorize using tail-folding" - "\ndefault Uses the default tail-folding settings for the target " - "CPU" - "\nall All legal loop types will vectorize using tail-folding" - "\nsimple Use tail-folding for simple loops (not reductions or " - "recurrences)" - "\nreductions Use tail-folding for loops containing reductions" - "\nrecurrences Use tail-folding for loops containing fixed order " + "Control the use of vectorisation using tail-folding for SVE where the" + " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" + "\ndisabled (Initial) No loop types will vectorize using " + "tail-folding" + "\ndefault (Initial) Uses the default tail-folding settings for " + "the target CPU" + "\nall (Initial) All legal loop types will vectorize using " + "tail-folding" + "\nsimple (Initial) Use tail-folding for simple loops (not " + "reductions or recurrences)" + "\nreductions Use tail-folding for loops containing reductions" + "\nnoreductions Inverse of above" + "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" - "\nreverse Use tail-folding for loops requiring reversed " - "predicates"), - cl::location(TailFoldingKindLoc)); + "\nnorecurrences Inverse of above" + "\nreverse Use tail-folding for loops requiring reversed " + "predicates" + "\nnoreverse Inverse of above"), + cl::location(TailFoldingOptionLoc)); // Experimental option that will only be fully functional when the // code-generator is changed to use SVE instead of NEON for all fixed-width @@ -3496,7 +3549,7 @@ } bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { - if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) + if (!ST->hasSVE()) return false; // We don't currently support vectorisation with interleaving for SVE - with @@ -3505,22 +3558,23 @@ if (TFI->IAI->hasGroups()) return false; - TailFoldingKind Required; // Defaults to 0. + TailFoldingOpts::IntType Required = 0; if (TFI->LVL->getReductionVars().size()) - Required.add(TailFoldingKind::TFReductions); + Required |= TailFoldingOpts::Reductions; if (TFI->LVL->getFixedOrderRecurrences().size()) - Required.add(TailFoldingKind::TFRecurrences); + Required |= TailFoldingOpts::Recurrences; // We call this to discover whether any load/store pointers in the loop have // negative strides. This will require extra work to reverse the loop // predicate, which may be expensive. if (containsDecreasingPointers(TFI->LVL->getLoop(), TFI->LVL->getPredicatedScalarEvolution())) - Required.add(TailFoldingKind::TFReverse); + Required |= TailFoldingOpts::Reverse; if (!Required) - Required.add(TailFoldingKind::TFSimple); + Required |= TailFoldingOpts::Simple; - return (TailFoldingKindLoc & Required) == Required; + return TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(), + Required); } InstructionCost Index: llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -529,6 +529,28 @@ } } +namespace TailFoldingOpts { +typedef uint8_t IntType; + +/// An enum to describe what types of loops we should attempt to tail-fold: +/// Disabled: None +/// Reductions: Loops containing reductions +/// Recurrences: Loops with first-order recurrences, i.e. that would +/// require a SVE splice instruction +/// Reverse: Reverse loops +/// Simple: Loops that are not reversed and don't contain reductions +/// or first-order recurrences. +/// All: All +enum TailFoldingOptsImpl : IntType { + Disabled = 0x00, + Simple = 0x01, + Reductions = 0x02, + Recurrences = 0x04, + Reverse = 0x08, + All = Reductions | Recurrences | Simple | Reverse +}; +} // namespace TailFoldingOpts + namespace AArch64ExactFPImm { struct ExactFPImm { const char *Name; Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll @@ -1,11 +1,11 @@ ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled+reductions -S | FileCheck %s -check-prefix=CHECK-TF-ONLYRED target triple = "aarch64-unknown-linux-gnu"