Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -660,6 +660,9 @@ /// Return true if the target supports masked expand load. bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Return true if we should be enabling ordered reductions for the target. + bool enableOrderedReductions() const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This @@ -1505,6 +1508,7 @@ virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; + virtual bool enableOrderedReductions() = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; @@ -1886,6 +1890,9 @@ bool isLegalMaskedExpandLoad(Type *DataType) override { return Impl.isLegalMaskedExpandLoad(DataType); } + bool enableOrderedReductions() override { + return Impl.enableOrderedReductions(); + } bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -262,6 +262,8 @@ bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } + bool enableOrderedReductions() const { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -409,6 +409,10 @@ return TTIImpl->isLegalMaskedExpandLoad(DataType); } +bool TargetTransformInfo::enableOrderedReductions() const { + return TTIImpl->enableOrderedReductions(); +} + bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -295,6 +295,8 @@ return BaseT::isLegalNTStore(DataType, Alignment); } + bool enableOrderedReductions() const { return true; } + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -332,8 +332,8 @@ cl::desc("Prefer in-loop vector reductions, " "overriding the targets preference.")); -cl::opt EnableStrictReductions( - "enable-strict-reductions", cl::init(false), cl::Hidden, +static cl::opt EnableStrictReductions( + "enable-strict-reductions", cl::init(true), cl::Hidden, cl::desc("Enable the vectorisation of loops with in-order (strict) " "FP reductions")); @@ -1319,8 +1319,7 @@ /// the IsOrdered flag of RdxDesc is set and we do not allow reordering /// of FP operations. bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) { - return EnableStrictReductions && !Hints->allowReordering() && - RdxDesc.isOrdered(); + return !Hints->allowReordering() && RdxDesc.isOrdered(); } /// \returns The smallest bitwidth each instruction can be represented with. @@ -10097,7 +10096,8 @@ return false; } - if (!LVL.canVectorizeFPMath(EnableStrictReductions)) { + if (!LVL.canVectorizeFPMath(EnableStrictReductions && + TTI->enableOrderedReductions())) { ORE->emit([&]() { auto *ExactFPMathInst = Requirements.getExactFPInst(); return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps", Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -2,6 +2,7 @@ ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions=false -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions=true -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -enable-strict-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED +; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-ORDERED define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-ORDERED-LABEL: @fadd_strict