Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -128,6 +128,7 @@ unsigned MinSVEVectorSizeInBits; unsigned MaxSVEVectorSizeInBits; unsigned VScaleForTuning = 2; + uint8_t DefaultSVETFOpts = TFDisabled; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -390,6 +391,8 @@ unsigned getVScaleForTuning() const { return VScaleForTuning; } + uint8_t getSVETailFoldingDefaultOpts() const { return DefaultSVETFOpts; } + const char* getChkStkName() const { if (isWindowsArm64EC()) return "__chkstk_arm64ec"; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -40,66 +40,76 @@ cl::init(10), cl::Hidden); namespace { -class TailFoldingKind { +class TailFoldingOption { private: - uint8_t Bits = 0; // Currently defaults to disabled. + std::string UnparsedOptionString; + SmallVector TailFoldTypes; -public: - enum TailFoldingOpts { - TFDisabled = 0x0, - TFReductions = 0x01, - TFRecurrences = 0x02, - TFReverse = 0x04, - TFSimple = 0x80, - TFAll = TFReductions | TFRecurrences | TFReverse | TFSimple - }; + uint8_t getBits(uint8_t DefaultBits) const { + if (!TailFoldTypes.size()) + return DefaultBits; - void operator=(const std::string &Val) { - if (Val.empty()) - return; - SmallVector TailFoldTypes; - StringRef(Val).split(TailFoldTypes, '+', -1, false); + uint8_t Bits = 0; for (auto TailFoldType : TailFoldTypes) { if (TailFoldType == "disabled") - Bits = 0; + Bits &= ~TFAll; else if (TailFoldType == "all") - Bits = TFAll; + Bits |= TFAll; else if (TailFoldType == "default") - Bits = 0; // Currently defaults to never tail-folding. + Bits |= DefaultBits; else if (TailFoldType == "simple") - add(TFSimple); + Bits |= TFSimple; else if (TailFoldType == "reductions") - add(TFReductions); + Bits |= TFReductions; else if (TailFoldType == "recurrences") - add(TFRecurrences); + Bits |= TFRecurrences; else if (TailFoldType == "reverse") - add(TFReverse); + Bits |= TFReverse; else if (TailFoldType == "noreductions") - remove(TFReductions); + Bits &= ~TFReductions; else if (TailFoldType == "norecurrences") - remove(TFRecurrences); + Bits &= ~TFRecurrences; else if (TailFoldType == "noreverse") - remove(TFReverse); - else { + Bits &= ~TFReverse; + else + llvm_unreachable("Unrecognised tail-folding options!"); + } + + return Bits; + } + +public: + void operator=(const std::string &Val) { + if (Val.empty()) + return; + + UnparsedOptionString = Val; + StringRef(UnparsedOptionString).split(TailFoldTypes, '+', -1, false); + for (auto TailFoldType : TailFoldTypes) { + if (TailFoldType != "disabled" && TailFoldType != "all" && + TailFoldType != "default" && TailFoldType != "simple" && + TailFoldType != "reductions" && TailFoldType != "recurrences" && + TailFoldType != "reverse" && TailFoldType != "noreductions" && + TailFoldType != "norecurrences" && TailFoldType != "noreverse") { errs() - << "invalid argument " << TailFoldType.str() - << " to -sve-tail-folding=; each element must be one of: disabled, " - "all, default, simple, reductions, noreductions, recurrences, " - "norecurrences\n"; + << "invalid argument '" << TailFoldType.str() + << "' to -sve-tail-folding=; each element must be one of: disabled" + ", all, default, simple, reductions, noreductions, recurrences" + ", norecurrences, reverse, noreverse\n"; + report_fatal_error("Unrecognised tail-folding option"); } } } - operator uint8_t() const { return Bits; } - - void add(uint8_t Flag) { Bits |= Flag; } - void remove(uint8_t Flag) { Bits &= ~Flag; } + bool satisfies(uint8_t DefaultBits, uint8_t Required) const { + return (getBits(DefaultBits) & Required) == Required; + } }; } // namespace -TailFoldingKind TailFoldingKindLoc; +TailFoldingOption TailFoldingOptionLoc; -cl::opt> SVETailFolding( +cl::opt> SVETailFolding( "sve-tail-folding", cl::desc( "Control the use of vectorisation using tail-folding for SVE:" @@ -114,7 +124,7 @@ "recurrences" "\nreverse Use tail-folding for loops requiring reversed " "predicates"), - cl::location(TailFoldingKindLoc)); + cl::location(TailFoldingOptionLoc)); // Experimental option that will only be fully functional when the // code-generator is changed to use SVE instead of NEON for all fixed-width @@ -3496,7 +3506,7 @@ } bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) { - if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled) + if (!ST->hasSVE()) return false; // We don't currently support vectorisation with interleaving for SVE - with @@ -3505,22 +3515,23 @@ if (TFI->IAI->hasGroups()) return false; - TailFoldingKind Required; // Defaults to 0. + uint8_t Required = 0; if (TFI->LVL->getReductionVars().size()) - Required.add(TailFoldingKind::TFReductions); + Required |= TFReductions; if (TFI->LVL->getFixedOrderRecurrences().size()) - Required.add(TailFoldingKind::TFRecurrences); + Required |= TFRecurrences; // We call this to discover whether any load/store pointers in the loop have // negative strides. This will require extra work to reverse the loop // predicate, which may be expensive. if (containsDecreasingPointers(TFI->LVL->getLoop(), TFI->LVL->getPredicatedScalarEvolution())) - Required.add(TailFoldingKind::TFReverse); + Required |= TFReverse; if (!Required) - Required.add(TailFoldingKind::TFSimple); + Required |= TFSimple; - return (TailFoldingKindLoc & Required) == Required; + return TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(), + Required); } InstructionCost Index: llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -529,6 +529,24 @@ } } +/// An enum to describe what types of loops we should attempt to tail-fold: +/// TFDisabled: None +/// TFReductions: Loops containing reductions +/// TFRecurrences: Loops with first-order recurrences, i.e. that would +/// require a SVE splice instruction +/// TFReverse: Reverse loops +/// TFSimple: Loops that are not reversed and don't contain reductions +/// or first-order recurrences. +/// TFAll: All +enum TailFoldingOpts : uint8_t { + TFDisabled = 0x00, + TFSimple = 0x01, + TFReductions = 0x02, + TFRecurrences = 0x04, + TFReverse = 0x08, + TFAll = TFReductions | TFRecurrences | TFSimple | TFReverse +}; + namespace AArch64ExactFPImm { struct ExactFPImm { const char *Name;