Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -263,6 +263,9 @@ const SCEV *const *op_end, PointerType *PTy, Type *Ty, Value *V); + /// \brief Find a previous Value in ExprValueMap for expand. + Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); + Value *expand(const SCEV *S); /// \brief Insert code to directly compute the specified SCEV expression Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1598,6 +1598,34 @@ return V; } +Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S, + const Instruction *InsertPt) { + SetVector *Set = SE.getSCEVValues(S); + // If the expansion is not in CanonicalMode, and the SCEV contains any + // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. + if (CanonicalMode || !SE.containsAddRecurrence(S)) { + // If S is scConstant, it may be worse to reuse an existing Value. + if (S->getSCEVType() != scConstant && Set) { + // Choose a Value from the set which dominates the insertPt. + // insertPt should be inside the Value's parent loop so as not to break + // the LCSSA form. + for (auto const &Ent : *Set) { + Instruction *EntInst = nullptr; + if (Ent && isa(Ent) && + (EntInst = cast(Ent)) && + S->getType() == Ent->getType() && + EntInst->getFunction() == InsertPt->getFunction() && + SE.DT.dominates(EntInst, InsertPt) && + (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) { + return Ent; + } + } + } + } + return nullptr; +} + // The expansion of SCEV will either reuse a previous Value in ExprValueMap, // or expand the SCEV literally. Specifically, if the expansion is in LSRMode, // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded @@ -1643,31 +1671,8 @@ Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. - SetVector *Set = SE.getSCEVValues(S); - Value *V = nullptr; - // If the expansion is not in CanonicalMode, and the SCEV contains any - // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. - if (CanonicalMode || !SE.containsAddRecurrence(S)) { - // If S is scConstant, it may be worse to reuse an existing Value. - if (S->getSCEVType() != scConstant && Set) { - // Choose a Value from the set which dominates the insertPt. - // insertPt should be inside the Value's parent loop so as not to break - // the LCSSA form. - for (auto const &Ent : *Set) { - Instruction *EntInst = nullptr; - if (Ent && isa(Ent) && - (EntInst = cast(Ent)) && - S->getType() == Ent->getType() && - EntInst->getFunction() == InsertPt->getFunction() && - SE.DT.dominates(EntInst, InsertPt) && - (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) { - V = Ent; - break; - } - } - } - } + Value *V = FindValueInExprValueMap(S, InsertPt); + if (!V) V = visit(S); @@ -1877,6 +1882,11 @@ return RHS; } + // Use expand's logic which is used for reusing a previous Value in + // ExprValueMap. + if (Value *Val = FindValueInExprValueMap(S, At)) + return Val; + // There is potential to make this significantly smarter, but this simple // heuristic already gets some interesting cases. Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -311,9 +311,12 @@ return false; BasicBlock *Header = L->getHeader(); + BasicBlock *PH = L->getLoopPreheader(); + BranchInst *PreHeaderBR = cast(PH->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); - if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; // We only handle cases when the unroll factor is a power of 2. @@ -331,13 +334,12 @@ if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); - BasicBlock *PH = L->getLoopPreheader(); BasicBlock *Latch = L->getLoopLatch(); // It helps to split the original preheader twice, one for the end of the // prolog code and one for a new loop preheader. BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI); BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI); - BranchInst *PreHeaderBR = cast(PH->getTerminator()); + PreHeaderBR = cast(PH->getTerminator()); // Compute the number of extra iterations required, which is: // extra iterations = run-time trip count % (loop unroll factor + 1) Index: llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ llvm/trunk/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -24,4 +24,38 @@ ret i32 0 } +;; Though SCEV for loop tripcount contains division, +;; it shouldn't be considered expensive, since the division already +;; exists in the code and we don't need to expand it once more. +;; Thus, it shouldn't prevent us from unrolling the loop. + +define i32 @test2(i64* %loc, i64 %conv7) { +; CHECK-LABEL: @test2( +; CHECK: udiv +; CHECK: udiv +; CHECK-NOT: udiv +; CHECK-LABEL: for.body.prol +entry: + %rem0 = load i64, i64* %loc, align 8 + %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression + br label %bb1 +bb1: + %div11 = udiv i64 %ExpensiveComputation, %conv7 + %cmp.i38 = icmp ugt i64 %div11, 1 + %div12 = select i1 %cmp.i38, i64 %div11, i64 1 + br label %for.body +for.body: + %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ] + %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ] + %mul1 = mul i64 %rem1, 48271 + %rem2 = urem i64 %mul1, 2147483647 + %dec = add i64 %k1, -1 + %cmp = icmp eq i64 %dec, 0 + br i1 %cmp, label %exit, label %for.body +exit: + %rem3 = phi i64 [ %rem2, %for.body ] + store i64 %rem3, i64* %loc, align 8 + ret i32 0 +} + !0 = !{i64 1, i64 100}