Index: llvm/include/llvm/Analysis/IVDescriptors.h =================================================================== --- llvm/include/llvm/Analysis/IVDescriptors.h +++ llvm/include/llvm/Analysis/IVDescriptors.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Operator.h" @@ -225,6 +226,10 @@ SmallVector getReductionOpChain(PHINode *Phi, Loop *L) const; + /// Return the correct TargetTransformInfo::ReductionFlags for this reduction, + /// with NoNaN set as per the argument. + TargetTransformInfo::ReductionFlags getReductionFlags(bool NoNaN); + private: // The starting value of the recurrence. // It does not have to be zero! Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -335,22 +335,21 @@ /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require /// additional information supplied in \p Flags. -/// The target is queried to determine if intrinsics or shuffle sequences are -/// required to implement the reduction. +/// UseReductionIntrinsic is used to determine if intrinsics or shuffle +/// sequences are required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. -Value *createSimpleTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, +Value *createSimpleTargetReduction(IRBuilderBase &B, unsigned Opcode, + Value *Src, bool UseReductionIntrinsic, TargetTransformInfo::ReductionFlags Flags = TargetTransformInfo::ReductionFlags(), ArrayRef RedOps = None); /// Create a generic target reduction using a recurrence descriptor \p Desc -/// The target is queried to determine if intrinsics or shuffle sequences are -/// required to implement the reduction. +/// UseReductionIntrinsic is used to determine if intrinsics or shuffle +/// sequences are required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. -Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, +Value *createTargetReduction(IRBuilderBase &B, RecurrenceDescriptor &Desc, + Value *Src, bool UseReductionIntrinsic, bool NoNaN = false); /// Get the intersection (logical and) of all of the potential IR flags Index: llvm/lib/Analysis/IVDescriptors.cpp =================================================================== --- llvm/lib/Analysis/IVDescriptors.cpp +++ llvm/lib/Analysis/IVDescriptors.cpp @@ -868,6 +868,30 @@ return ReductionOperations; } +TargetTransformInfo::ReductionFlags +RecurrenceDescriptor::getReductionFlags(bool NoNaN) { + RecurrenceKind RecKind = getRecurrenceKind(); + TargetTransformInfo::ReductionFlags Flags; + Flags.NoNaN = NoNaN; + + switch (RecKind) { + case RK_IntegerMinMax: { + MinMaxRecurrenceKind MMKind = getMinMaxRecurrenceKind(); + Flags.IsMaxOp = (MMKind == MRK_SIntMax || MMKind == MRK_UIntMax); + Flags.IsSigned = (MMKind == MRK_SIntMax || MMKind == MRK_SIntMin); + break; + } + case RK_FloatMinMax: { + Flags.IsMaxOp = getMinMaxRecurrenceKind() == MRK_FloatMax; + break; + } + default: + break; + } + + return Flags; +} + InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *BOp, SmallVectorImpl *Casts) Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -954,10 +954,11 @@ /// Create a simple vector reduction specified by an opcode and some /// flags (if generating min/max reductions). -Value *llvm::createSimpleTargetReduction( - IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode, - Value *Src, TargetTransformInfo::ReductionFlags Flags, - ArrayRef RedOps) { +Value * +llvm::createSimpleTargetReduction(IRBuilderBase &Builder, unsigned Opcode, + Value *Src, bool UseReductionIntrinsic, + TargetTransformInfo::ReductionFlags Flags, + ArrayRef RedOps) { auto *SrcVTy = cast(Src->getType()); std::function BuildFunc; @@ -1020,22 +1021,19 @@ llvm_unreachable("Unhandled opcode"); break; } - if (ForceReductionIntrinsic || - TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags)) + if (ForceReductionIntrinsic || UseReductionIntrinsic) return BuildFunc(); return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps); } /// Create a vector reduction using a given recurrence descriptor. -Value *llvm::createTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, +Value *llvm::createTargetReduction(IRBuilderBase &B, RecurrenceDescriptor &Desc, + Value *Src, bool UseReductionIntrinsic, bool NoNaN) { // TODO: Support in-order reductions based on the recurrence descriptor. using RD = RecurrenceDescriptor; RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); - TargetTransformInfo::ReductionFlags Flags; - Flags.NoNaN = NoNaN; + TargetTransformInfo::ReductionFlags Flags = Desc.getReductionFlags(NoNaN); // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. @@ -1044,28 +1042,33 @@ switch (RecKind) { case RD::RK_FloatAdd: - return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); + return createSimpleTargetReduction(B, Instruction::FAdd, Src, + UseReductionIntrinsic, Flags); case RD::RK_FloatMult: - return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); + return createSimpleTargetReduction(B, Instruction::FMul, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerAdd: - return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); + return createSimpleTargetReduction(B, Instruction::Add, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerMult: - return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); + return createSimpleTargetReduction(B, Instruction::Mul, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerAnd: - return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); + return createSimpleTargetReduction(B, Instruction::And, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerOr: - return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); + return createSimpleTargetReduction(B, Instruction::Or, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerXor: - return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); + return createSimpleTargetReduction(B, Instruction::Xor, Src, + UseReductionIntrinsic, Flags); case RD::RK_IntegerMinMax: { - RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); - Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); - Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); - return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); + return createSimpleTargetReduction(B, Instruction::ICmp, Src, + UseReductionIntrinsic, Flags); } case RD::RK_FloatMinMax: { - Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; - return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); + return createSimpleTargetReduction(B, Instruction::FCmp, Src, + UseReductionIntrinsic, Flags); } default: llvm_unreachable("Unhandled RecKind"); Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4011,7 +4011,8 @@ if (PreferPredicatedReductionSelect || TTI->preferPredicatedReductionSelect( RdxDesc.getRecurrenceBinOp(RdxDesc.getRecurrenceKind()), - Phi->getType(), TargetTransformInfo::ReductionFlags())) { + Phi->getType(), + RdxDesc.getReductionFlags(Legal->hasFunNoNaNAttr()))) { auto *VecRdxPhi = cast(getOrCreateVectorValue(Phi, Part)); VecRdxPhi->setIncomingValueForBlock( LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); @@ -4079,8 +4080,12 @@ // target reduction in the loop using a Reduction recipe. if (VF.isVector() && !IsInLoopReductionPhi) { bool NoNaN = Legal->hasFunNoNaNAttr(); - ReducedPartRdx = - createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, NoNaN); + bool UseReductionIntrinsic = TTI->useReductionIntrinsic( + RdxDesc.getRecurrenceBinOp(RdxDesc.getRecurrenceKind()), + ReducedPartRdx->getType(), + RdxDesc.getReductionFlags(NoNaN)); + ReducedPartRdx = createTargetReduction(Builder, RdxDesc, ReducedPartRdx, + UseReductionIntrinsic, NoNaN); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. if (Phi->getType() != RdxDesc.getRecurrenceType()) @@ -6873,8 +6878,9 @@ // want to record it as such. unsigned Opcode = RdxDesc.getRecurrenceBinOp(RdxDesc.getRecurrenceKind()); if (!PreferInLoopReductions && - !TTI.preferInLoopReduction(Opcode, Phi->getType(), - TargetTransformInfo::ReductionFlags())) + !TTI.preferInLoopReduction( + Opcode, Phi->getType(), + RdxDesc.getReductionFlags(Legal->hasFunNoNaNAttr()))) continue; // Check that we can correctly put the reductions into the loop, by @@ -7880,8 +7886,12 @@ R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId; VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId)); + bool UseReductionIntrinsic = TTI->useReductionIntrinsic( + RdxDesc.getRecurrenceBinOp(Kind), RdxDesc.getRecurrenceType(), + RdxDesc.getReductionFlags(Legal->hasFunNoNaNAttr())); VPReductionRecipe *RedRecipe = new VPReductionRecipe( - &RdxDesc, R, ChainOp, VecOp, Legal->hasFunNoNaNAttr(), TTI); + &RdxDesc, R, ChainOp, VecOp, Legal->hasFunNoNaNAttr(), + UseReductionIntrinsic); WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); WidenRecipe->eraseFromParent(); @@ -7999,8 +8009,8 @@ for (unsigned Part = 0; Part < State.UF; ++Part) { unsigned Kind = RdxDesc->getRecurrenceKind(); Value *NewVecOp = State.get(VecOp, Part); - Value *NewRed = - createTargetReduction(State.Builder, TTI, *RdxDesc, NewVecOp, NoNaN); + Value *NewRed = createTargetReduction(State.Builder, *RdxDesc, NewVecOp, + UseReductionIntrinsic, NoNaN); Value *PrevInChain = State.get(ChainOp, Part); Value *NextInChain; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7074,9 +7074,12 @@ // FIXME: The builder should use an FMF guard. It should not be hard-coded // to 'fast'. assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF"); - return createSimpleTargetReduction( - Builder, TTI, ReductionData.getOpcode(), VectorizedValue, - ReductionData.getFlags(), ReductionOps.back()); + TTI::ReductionFlags Flags = ReductionData.getFlags(); + bool UseReductionIntrinsic = TTI->useReductionIntrinsic( + ReductionData.getOpcode(), VectorizedValue->getType(), Flags); + return createSimpleTargetReduction(Builder, ReductionData.getOpcode(), + VectorizedValue, UseReductionIntrinsic, + Flags, ReductionOps.back()); } Value *TmpVec = VectorizedValue; Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1051,14 +1051,15 @@ VPValue *ChainOp; /// Fast math flags to use for the resulting reduction operation. bool NoNaN; - /// Pointer to the TTI, needed to create the target reduction - const TargetTransformInfo *TTI; + /// Flag for whether to use reduction intrinsics vs shuffle expansions. + bool UseReductionIntrinsic; public: VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, - VPValue *VecOp, bool NoNaN, const TargetTransformInfo *TTI) + VPValue *VecOp, bool NoNaN, bool UseReductionIntrinsic) : VPRecipeBase(VPReductionSC), RdxDesc(R), I(I), VecOp(VecOp), - ChainOp(ChainOp), NoNaN(NoNaN), TTI(TTI) {} + ChainOp(ChainOp), NoNaN(NoNaN), + UseReductionIntrinsic(UseReductionIntrinsic) {} ~VPReductionRecipe() override = default;