Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -128,6 +128,7 @@ unsigned MinSVEVectorSizeInBits; unsigned MaxSVEVectorSizeInBits; unsigned VScaleForTuning = 2; + uint8_t DefaultSVETFOpts = TFDisabled; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -390,6 +391,8 @@ unsigned getVScaleForTuning() const { return VScaleForTuning; } + uint8_t getSVETailFoldingDefaultOpts() const { return DefaultSVETFOpts; } + const char* getChkStkName() const { if (isWindowsArm64EC()) return "__chkstk_arm64ec"; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -40,81 +40,113 @@ cl::init(10), cl::Hidden); namespace { -class TailFoldingKind { +class TailFoldingOption { private: - uint8_t Bits = 0; // Currently defaults to disabled. + uint8_t InitialBits, EnableBits, DisableBits; + bool NeedsDefault; + + void setInitialBits(uint8_t Bits) { InitialBits = Bits; } + + void setNeedsDefault(void) { NeedsDefault = true; } + + void setEnableBit(uint8_t Bit) { + EnableBits |= Bit; + DisableBits &= ~Bit; + } + + void setDisableBit(uint8_t Bit) { + EnableBits &= ~Bit; + DisableBits |= Bit; + } + + uint8_t getBits(uint8_t DefaultBits) const { + uint8_t Bits = 0; + + assert((!InitialBits || !NeedsDefault) && + "Initial bits should only include one of " + "(disabled|all|simple|default)"); + Bits = NeedsDefault ? DefaultBits : InitialBits; + Bits |= EnableBits; + Bits &= ~DisableBits; + + return Bits; + } public: - enum TailFoldingOpts { - TFDisabled = 0x0, - TFReductions = 0x01, - TFRecurrences = 0x02, - TFReverse = 0x04, - TFSimple = 0x80, - TFAll = TFReductions | TFRecurrences | TFReverse | TFSimple - }; + TailFoldingOption() + : InitialBits(0), EnableBits(0), DisableBits(0), NeedsDefault(false) {} void operator=(const std::string &Val) { - if (Val.empty()) + if (Val.empty()) { + setNeedsDefault(); return; - SmallVector TailFoldTypes; + } + + SmallVector TailFoldTypes; StringRef(Val).split(TailFoldTypes, '+', -1, false); + int Count = 0; for (auto TailFoldType : TailFoldTypes) { - if (TailFoldType == "disabled") - Bits = 0; - else if (TailFoldType == "all") - Bits = TFAll; - else if (TailFoldType == "default") - Bits = 0; // Currently defaults to never tail-folding. - else if (TailFoldType == "simple") - add(TFSimple); + if (Count == 0 && TailFoldType == "disabled") + setInitialBits(0); + else if (Count == 0 && TailFoldType == "all") + setInitialBits(TFAll); + else if (Count == 0 && TailFoldType == "default") + setNeedsDefault(); + else if (Count == 0 && TailFoldType == "simple") + setInitialBits(TFSimple); else if (TailFoldType == "reductions") - add(TFReductions); + setEnableBit(TFReductions); else if (TailFoldType == "recurrences") - add(TFRecurrences); + setEnableBit(TFRecurrences); else if (TailFoldType == "reverse") - add(TFReverse); + setEnableBit(TFReverse); else if (TailFoldType == "noreductions") - remove(TFReductions); + setDisableBit(TFReductions); else if (TailFoldType == "norecurrences") - remove(TFRecurrences); + setDisableBit(TFRecurrences); else if (TailFoldType == "noreverse") - remove(TFReverse); + setDisableBit(TFReverse); else { - errs() - << "invalid argument " << TailFoldType.str() - << " to -sve-tail-folding=; each element must be one of: disabled, " - "all, default, simple, reductions, noreductions, recurrences, " - "norecurrences\n"; + errs() << "invalid argument '" << TailFoldType.str() + << "' to -sve-tail-folding=; the option should be of the form\n" + " (disabled|all|default|simple)[+(reductions|recurrences" + "|reverse|noreductions|norecurrences|noreverse)]\n"; + report_fatal_error("Unrecognised tail-folding option"); } + Count++; } } - operator uint8_t() const { return Bits; } - - void add(uint8_t Flag) { Bits |= Flag; } - void remove(uint8_t Flag) { Bits &= ~Flag; } + bool satisfies(uint8_t DefaultBits, uint8_t Required) const { + return (getBits(DefaultBits) & Required) == Required; + } }; } // namespace -TailFoldingKind TailFoldingKindLoc; +TailFoldingOption TailFoldingOptionLoc; -cl::opt> SVETailFolding( +cl::opt> SVETailFolding( "sve-tail-folding", cl::desc( - "Control the use of vectorisation using tail-folding for SVE:" - "\ndisabled No loop types will vectorize using tail-folding" - "\ndefault Uses the default tail-folding settings for the target " - "CPU" - "\nall All legal loop types will vectorize using tail-folding" - "\nsimple Use tail-folding for simple loops (not reductions or " - "recurrences)" - "\nreductions Use tail-folding for loops containing reductions" - "\nrecurrences Use tail-folding for loops containing fixed order " + "Control the use of vectorisation using tail-folding for SVE where the" + " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" + "\ndisabled (Initial) No loop types will vectorize using " + "tail-folding" + "\ndefault (Initial) Uses the default tail-folding settings for " + "the target CPU" + "\nall (Initial) All legal loop types will vectorize using " + "tail-folding" + "\nsimple (Initial) Use tail-folding for simple loops (not " + "reductions or recurrences)" + "\nreductions Use tail-folding for loops containing reductions" + "\nnoreductions Inverse of above" + "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" - "\nreverse Use tail-folding for loops requiring reversed " - "predicates"), - cl::location(TailFoldingKindLoc)); + "\nnorecurrences Inverse of above" + "\nreverse Use tail-folding for loops requiring reversed " + "predicates" + "\nnoreverse Inverse of above"), + cl::location(TailFoldingOptionLoc)); // Experimental option that will only be fully functional when the // code-generator is changed to use SVE instead of NEON for all fixed-width @@ -3496,7 +3528,7 @@ } bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { - if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) + if (!ST->hasSVE()) return false; // We don't currently support vectorisation with interleaving for SVE - with @@ -3505,22 +3537,23 @@ if (TFI->IAI->hasGroups()) return false; - TailFoldingKind Required; // Defaults to 0. + uint8_t Required = 0; if (TFI->LVL->getReductionVars().size()) - Required.add(TailFoldingKind::TFReductions); + Required |= TFReductions; if (TFI->LVL->getFixedOrderRecurrences().size()) - Required.add(TailFoldingKind::TFRecurrences); + Required |= TFRecurrences; // We call this to discover whether any load/store pointers in the loop have // negative strides. This will require extra work to reverse the loop // predicate, which may be expensive. if (containsDecreasingPointers(TFI->LVL->getLoop(), TFI->LVL->getPredicatedScalarEvolution())) - Required.add(TailFoldingKind::TFReverse); + Required |= TFReverse; if (!Required) - Required.add(TailFoldingKind::TFSimple); + Required |= TFSimple; - return (TailFoldingKindLoc & Required) == Required; + return TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(), + Required); } InstructionCost Index: llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -529,6 +529,24 @@ } } +/// An enum to describe what types of loops we should attempt to tail-fold: +/// TFDisabled: None +/// TFReductions: Loops containing reductions +/// TFRecurrences: Loops with first-order recurrences, i.e. that would +/// require a SVE splice instruction +/// TFReverse: Reverse loops +/// TFSimple: Loops that are not reversed and don't contain reductions +/// or first-order recurrences. +/// TFAll: All +enum TailFoldingOpts : uint8_t { + TFDisabled = 0x00, + TFSimple = 0x01, + TFReductions = 0x02, + TFRecurrences = 0x04, + TFReverse = 0x08, + TFAll = TFReductions | TFRecurrences | TFSimple | TFReverse +}; + namespace AArch64ExactFPImm { struct ExactFPImm { const char *Name; Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-option.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled -S | FileCheck %s -check-prefix=CHECK-NOTF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=default -S | FileCheck %s -check-prefix=CHECK-NOTF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all -S | FileCheck %s -check-prefix=CHECK-TF -; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=disabled+simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF +; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=simple+reductions+recurrences+reverse -S | FileCheck %s -check-prefix=CHECK-TF ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreductions -S | FileCheck %s -check-prefix=CHECK-TF-NORED ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+norecurrences -S | FileCheck %s -check-prefix=CHECK-TF-NOREC ; RUN: opt < %s -passes=loop-vectorize -sve-tail-folding=all+noreverse -S | FileCheck %s -check-prefix=CHECK-TF-NOREV