Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -660,6 +660,9 @@ /// Return true if the target supports masked expand load. bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Return true if we should be enabling ordered reductions for the target. + bool enableOrderedReductions() const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This @@ -1505,6 +1508,7 @@ virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; + virtual bool enableOrderedReductions() = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; @@ -1886,6 +1890,9 @@ bool isLegalMaskedExpandLoad(Type *DataType) override { return Impl.isLegalMaskedExpandLoad(DataType); } + bool enableOrderedReductions() override { + return Impl.enableOrderedReductions(); + } bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -262,6 +262,8 @@ bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } + bool enableOrderedReductions() const { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -409,6 +409,10 @@ return TTIImpl->isLegalMaskedExpandLoad(DataType); } +bool TargetTransformInfo::enableOrderedReductions() const { + return TTIImpl->enableOrderedReductions(); +} + bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -295,6 +295,8 @@ return BaseT::isLegalNTStore(DataType, Alignment); } + bool enableOrderedReductions() const { true; } + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -332,7 +332,7 @@ cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")); -cl::opt ForceOrderedReductions( +static cl::opt ForceOrderedReductions( "force-ordered-reductions", cl::init(false), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")); @@ -1319,8 +1319,7 @@ /// the IsOrdered flag of RdxDesc is set and we do not allow reordering /// of FP operations. bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) { - return ForceOrderedReductions && !Hints->allowReordering() && - RdxDesc.isOrdered(); + return !Hints->allowReordering() && RdxDesc.isOrdered(); } /// \returns The smallest bitwidth each instruction can be represented with. @@ -7198,8 +7197,9 @@ // If we're using ordered reductions then we can just return the base cost // here, since getArithmeticReductionCost calculates the full ordered // reduction cost when FP reassociation is not allowed. - if (useOrderedReductions(RdxDesc)) + if (useOrderedReductions(RdxDesc)) { return BaseCost; + } // Get the operand that was not the reduction chain and match it to one of the // patterns, returning the better cost if it is found. @@ -10103,7 +10103,13 @@ return false; } - if (!LVL.canVectorizeFPMath(ForceOrderedReductions)) { + bool AllowOrderedReductions; + // If the flag is set, use that instead and override the TTI behaviour. + if (ForceOrderedReductions.getNumOccurrences() > 0) + AllowOrderedReductions = ForceOrderedReductions; + else + AllowOrderedReductions = TTI->enableOrderedReductions(); + if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) { ORE->emit([&]() { auto *ExactFPMathInst = Requirements.getExactFPInst(); return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps", Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-ORDERED-LABEL: @fadd_strict Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-ORDERED-LABEL: @fadd_strict