Index: llvm/include/llvm/Analysis/IVDescriptors.h =================================================================== --- llvm/include/llvm/Analysis/IVDescriptors.h +++ llvm/include/llvm/Analysis/IVDescriptors.h @@ -78,12 +78,14 @@ FastMathFlags FMF, Instruction *ExactFP, Type *RT, bool Signed, bool Ordered, SmallPtrSetImpl &CI, + SmallVectorImpl &RdxChain, unsigned MinWidthCastToRecurTy) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT), IsSigned(Signed), IsOrdered(Ordered), MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) { CastInsts.insert(CI.begin(), CI.end()); + ReductionChain.assign(RdxChain); } /// This POD struct holds information about a potential recurrence operation. @@ -253,6 +255,12 @@ /// recurrence. const SmallPtrSet &getCastInsts() const { return CastInsts; } + /// If found, returns the chain of operations from Phi to LoopExitInst that + /// can be treated as a set of reduction instructions for in-loop reductions. + const SmallVector &getReductionChain() const { + return ReductionChain; + } + /// Returns the minimum width used by the recurrence in bits. unsigned getMinWidthCastToRecurrenceTypeInBits() const { return MinWidthCastToRecurrenceType; @@ -264,11 +272,6 @@ /// Expose an ordered FP reduction to the instance users. bool isOrdered() const { return IsOrdered; } - /// Attempts to find a chain of operations from Phi to LoopExitInst that can - /// be treated as a set of reductions instructions for in-loop reductions. - SmallVector getReductionOpChain(PHINode *Phi, - Loop *L) const; - /// Returns true if the instruction is a call to the llvm.fmuladd intrinsic. static bool isFMulAddIntrinsic(Instruction *I) { return isa(I) && @@ -298,6 +301,8 @@ bool IsOrdered = false; // Instructions used for type-promoting the recurrence. SmallPtrSet CastInsts; + // The chain of operations between the reduction Phi and the exit instruction. + SmallVector ReductionChain; // The minimum width used by the recurrence. unsigned MinWidthCastToRecurrenceType; }; Index: llvm/lib/Analysis/IVDescriptors.cpp =================================================================== --- llvm/lib/Analysis/IVDescriptors.cpp +++ llvm/lib/Analysis/IVDescriptors.cpp @@ -203,6 +203,81 @@ } } +SmallVector getReductionOpChain(PHINode *Phi, Loop *L, + Instruction *LoopExitInstr, + unsigned RedOp) { + SmallVector ReductionOperations; + + // Search down from the Phi to the LoopExitInstr, looking for instructions + // with a single user of the correct type for the reduction. + + // Note that we check that the type of the operand is correct for each item in + // the chain, including the last (the loop exit value). This can come up from + // sub, which would otherwise be treated as an add reduction. MinMax also need + // to check for a pair of icmp/select, for which we use getNextInstruction and + // isCorrectOpcode functions to step the right number of instruction, and + // check the icmp/select pair. + // FIXME: We also do not attempt to look through Phi/Select's yet, which might + // be part of the reduction chain, or attempt to looks through And's to find a + // smaller bitwidth. Subs are also currently not allowed (which are usually + // treated as part of a add reduction) as they are expected to generally be + // more expensive than out-of-loop reductions, and need to be costed more + // carefully. + unsigned ExpectedUses = 1; + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) + ExpectedUses = 2; + + auto getNextInstruction = [&](Instruction *Cur) { + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { + // We are expecting a icmp/select pair, which we go to the next select + // instruction if we can. We already know that Cur has 2 uses. + if (isa(*Cur->user_begin())) + return cast(*Cur->user_begin()); + else + return cast(*std::next(Cur->user_begin())); + } + return cast(*Cur->user_begin()); + }; + auto isCorrectOpcode = [&](Instruction *Cur) { + if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { + Value *LHS, *RHS; + return SelectPatternResult::isMinOrMax( + matchSelectPattern(Cur, LHS, RHS).Flavor); + } + // Recognize a call to the llvm.fmuladd intrinsic. + if (isa(Cur) && + cast(Cur)->getIntrinsicID() == Intrinsic::fmuladd) + return true; + + return Cur->getOpcode() == RedOp; + }; + + // The loop exit instruction we check first (as a quick test) but add last. We + // check the opcode is correct (and dont allow them to be Subs) and that they + // have expected to have the expected number of uses. They will have one use + // from the phi and one from a LCSSA value, no matter the type. + if (!isCorrectOpcode(LoopExitInstr)) + return {}; + + // Check that the Phi has one (or two for min/max) uses. + if (!Phi->hasNUses(ExpectedUses)) + return {}; + Instruction *Cur = getNextInstruction(Phi); + + // Each other instruction in the chain should have the expected number of uses + // and be the correct opcode. + while (Cur != LoopExitInstr) { + if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) + return {}; + + ReductionOperations.push_back(Cur); + Cur = getNextInstruction(Cur); + } + + ReductionOperations.push_back(Cur); + return ReductionOperations; +} + // Check if a given Phi node can be recognized as an ordered reduction for // vectorizing floating point operations without unsafe math. static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, @@ -218,8 +293,7 @@ !RecurrenceDescriptor::isFMulAddIntrinsic(Exit)) return false; - // Ensure the exit instruction has only one user other than the reduction PHI - if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3)) + if (Exit != ExactFPMathInst) return false; // The only pattern accepted is the one in which the reduction PHI @@ -480,8 +554,14 @@ if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction) return false; - const bool IsOrdered = checkOrderedReduction( - Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi); + SmallVector ReductionChain; + ReductionChain = + getReductionOpChain(Phi, TheLoop, ExitInstruction, getOpcode(Kind)); + + bool IsOrdered = false; + if (!ReductionChain.empty()) + IsOrdered = checkOrderedReduction(Kind, ReduxDesc.getExactFPMathInst(), + ExitInstruction, Phi); if (Start != Phi) { // If the starting value is not the same as the phi node, we speculatively @@ -540,7 +620,7 @@ // Save the description of this reduction variable. RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ReduxDesc.getExactFPMathInst(), RecurrenceType, - IsSigned, IsOrdered, CastInsts, + IsSigned, IsOrdered, CastInsts, ReductionChain, MinWidthCastToRecurrenceType); RedDes = RD; @@ -1032,80 +1112,6 @@ } } -SmallVector -RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { - SmallVector ReductionOperations; - unsigned RedOp = getOpcode(Kind); - - // Search down from the Phi to the LoopExitInstr, looking for instructions - // with a single user of the correct type for the reduction. - - // Note that we check that the type of the operand is correct for each item in - // the chain, including the last (the loop exit value). This can come up from - // sub, which would otherwise be treated as an add reduction. MinMax also need - // to check for a pair of icmp/select, for which we use getNextInstruction and - // isCorrectOpcode functions to step the right number of instruction, and - // check the icmp/select pair. - // FIXME: We also do not attempt to look through Phi/Select's yet, which might - // be part of the reduction chain, or attempt to looks through And's to find a - // smaller bitwidth. Subs are also currently not allowed (which are usually - // treated as part of a add reduction) as they are expected to generally be - // more expensive than out-of-loop reductions, and need to be costed more - // carefully. - unsigned ExpectedUses = 1; - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) - ExpectedUses = 2; - - auto getNextInstruction = [&](Instruction *Cur) { - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { - // We are expecting a icmp/select pair, which we go to the next select - // instruction if we can. We already know that Cur has 2 uses. - if (isa(*Cur->user_begin())) - return cast(*Cur->user_begin()); - else - return cast(*std::next(Cur->user_begin())); - } - return cast(*Cur->user_begin()); - }; - auto isCorrectOpcode = [&](Instruction *Cur) { - if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) { - Value *LHS, *RHS; - return SelectPatternResult::isMinOrMax( - matchSelectPattern(Cur, LHS, RHS).Flavor); - } - // Recognize a call to the llvm.fmuladd intrinsic. - if (isFMulAddIntrinsic(Cur)) - return true; - - return Cur->getOpcode() == RedOp; - }; - - // The loop exit instruction we check first (as a quick test) but add last. We - // check the opcode is correct (and dont allow them to be Subs) and that they - // have expected to have the expected number of uses. They will have one use - // from the phi and one from a LCSSA value, no matter the type. - if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2)) - return {}; - - // Check that the Phi has one (or two for min/max) uses. - if (!Phi->hasNUses(ExpectedUses)) - return {}; - Instruction *Cur = getNextInstruction(Phi); - - // Each other instruction in the chain should have the expected number of uses - // and be the correct opcode. - while (Cur != LoopExitInstr) { - if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses)) - return {}; - - ReductionOperations.push_back(Cur); - Cur = getNextInstruction(Cur); - } - - ReductionOperations.push_back(Cur); - return ReductionOperations; -} - InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *BOp, Type *ElementType, Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7727,7 +7727,7 @@ // finding the chain of operations that leads from the phi to the loop // exit value. SmallVector ReductionOperations = - RdxDesc.getReductionOpChain(Phi, TheLoop); + RdxDesc.getReductionChain(); bool InLoop = !ReductionOperations.empty(); if (InLoop) { InLoopReductionChains[Phi] = ReductionOperations;