Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -299,10 +299,10 @@ ArrayRef RedOps = None); /// Generates a vector reduction using shufflevectors to reduce the value. +/// Fast-math-flags are propagated using the IRBuilder's setting. Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = RecurrenceDescriptor::MRK_Invalid, - FastMathFlags FMF = FastMathFlags(), ArrayRef RedOps = None); /// Create a target reduction of the given vector. The reduction operation @@ -310,17 +310,18 @@ /// additional information supplied in \p Flags. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. +/// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, unsigned Opcode, Value *Src, TargetTransformInfo::ReductionFlags Flags = TargetTransformInfo::ReductionFlags(), - FastMathFlags FMF = FastMathFlags(), ArrayRef RedOps = None); /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. +/// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN = false); Index: llvm/lib/CodeGen/ExpandReductions.cpp =================================================================== --- llvm/lib/CodeGen/ExpandReductions.cpp +++ llvm/lib/CodeGen/ExpandReductions.cpp @@ -118,11 +118,14 @@ } if (!TTI->shouldExpandReduction(II)) continue; + // Propagate FMF using the builder. FastMathFlags FMF = isa(II) ? II->getFastMathFlags() : FastMathFlags{}; + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FMF); Value *Rdx = IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK) - : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF); + : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); II->replaceAllUsesWith(Rdx); II->eraseFromParent(); Changed = true; Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -675,12 +675,6 @@ return true; } -static Value *addFastMathFlag(Value *V, FastMathFlags FMF) { - if (isa(V)) - cast(V)->setFastMathFlags(FMF); - return V; -} - Value *llvm::createMinMaxOp(IRBuilder<> &Builder, RecurrenceDescriptor::MinMaxRecurrenceKind RK, Value *Left, Value *Right) { @@ -761,7 +755,7 @@ Value * llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, - FastMathFlags FMF, ArrayRef RedOps) { + ArrayRef RedOps) { unsigned VF = Src->getType()->getVectorNumElements(); // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles // and vector ops, reducing the set of values being computed by half each @@ -784,10 +778,9 @@ ConstantVector::get(ShuffleMask), "rdx.shuf"); if (Op != Instruction::ICmp && Op != Instruction::FCmp) { - // Floating point operations had to be 'fast' to enable the reduction. - TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op, - TmpVec, Shuf, "bin.rdx"), - FMF); + // The builder propagates its fast-math-flags setting. + TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, + "bin.rdx"); } else { assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && "Invalid min/max"); @@ -804,7 +797,7 @@ /// flags (if generating min/max reductions). Value *llvm::createSimpleTargetReduction( IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode, - Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF, + Value *Src, TargetTransformInfo::ReductionFlags Flags, ArrayRef RedOps) { assert(isa(Src->getType()) && "Type must be a vector"); @@ -874,7 +867,7 @@ } if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps); + return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } /// Create a vector reduction using a given recurrence descriptor. @@ -887,39 +880,36 @@ RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; + + // All ops in the reduction inherit fast-math-flags from the recurrence + // descriptor. + IRBuilder<>::FastMathFlagGuard FMFGuard(B); + B.setFastMathFlags(Desc.getFastMathFlags()); + switch (RecKind) { case RD::RK_FloatAdd: - return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); case RD::RK_FloatMult: - return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); case RD::RK_IntegerAdd: - return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); case RD::RK_IntegerMult: - return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); case RD::RK_IntegerAnd: - return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); case RD::RK_IntegerOr: - return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); case RD::RK_IntegerXor: - return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); case RD::RK_IntegerMinMax: { RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); - return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); } case RD::RK_FloatMinMax: { Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; - return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags, - Desc.getFastMathFlags()); + return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); } default: llvm_unreachable("Unhandled RecKind"); Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6294,11 +6294,15 @@ assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); - if (!IsPairwiseReduction) + if (!IsPairwiseReduction) { + // FIXME: The builder should already have an FMF guard, and this should + // not be hard-coded to 'fast'. + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(FastMathFlags::getFast()); return createSimpleTargetReduction( Builder, TTI, ReductionData.getOpcode(), VectorizedValue, - ReductionData.getFlags(), FastMathFlags::getFast(), - ReductionOps.back()); + ReductionData.getFlags(), ReductionOps.back()); + } Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {