Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1807,6 +1807,23 @@ bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S, Loop *L, SmallPtrSetImpl &Processed) { + + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansionHelper(cast(S)->getOperand(), L, + Processed); + case scZeroExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, Processed); + case scSignExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, Processed); + } + if (!Processed.insert(S).second) return false; @@ -1849,23 +1866,22 @@ } } - // Recurse past add expressions, which commonly occur in the + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa(S) || isa(S)) + return true; + + // Recurse past nary expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + if (const SCEVNAryExpr *NAry = dyn_cast(S)) { + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if (isHighCostExpansionHelper(*I, L, Processed)) return true; } - return false; } - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa(S) || isa(S)) - return true; - // If we haven't recognized an expensive SCEV pattern, assume it's an // expression produced by program code. return false; Index: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp +++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -68,6 +68,22 @@ static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, cl::desc("Reduce live induction variables.")); +enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl }; + +static cl::opt ReplaceExitValue( + "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), + cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), + cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), + clEnumValN(OnlyCheapRepl, "cheap", + "only replace exit value when the cost is cheap"), + clEnumValN(AlwaysRepl, "always", + "always replace exit value whenever possible"), + clEnumValEnd)); + +namespace { +struct RewritePhi; +} + namespace { class IndVarSimplify : public LoopPass { LoopInfo *LI; @@ -112,6 +128,7 @@ void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM); + bool CanLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -464,6 +481,21 @@ SE->forgetLoop(L); } +namespace { +// Collect information about PHI nodes which can be transformed in +// RewriteLoopExitValues. +struct RewritePhi { + PHINode *PN; + unsigned Ith; // Ith incoming value. + Value *Val; // Exit value after expansion. + bool HighCost; // High Cost when expansion. + bool SafePhi; // LCSSASafePhiForRAUW. + + RewritePhi(PHINode *P, unsigned I, Value *V, bool H, bool S) + : PN(P), Ith(I), Val(V), HighCost(H), SafePhi(S) {} +}; +} + //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -486,6 +518,7 @@ SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); + SmallVector RewritePhiSet; // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. @@ -604,31 +637,111 @@ DeadInsts.push_back(ExitVal); continue; } - Changed = true; - ++NumReplaced; - - PN->setIncomingValue(i, ExitVal); + bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L); - // If this instruction is dead now, delete it. Don't do it now to avoid - // invalidating iterators. - if (isInstructionTriviallyDead(Inst, TLI)) - DeadInsts.push_back(Inst); - - // If we determined that this PHI is safe to replace even if an LCSSA - // PHI, do so. - if (LCSSASafePhiForRAUW) { - PN->replaceAllUsesWith(ExitVal); - PN->eraseFromParent(); - } + // Collect all the candidate PHINodes to be rewritten. + RewritePhiSet.push_back( + RewritePhi(PN, i, ExitVal, HighCost, LCSSASafePhiForRAUW)); } } } + bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet); + + // Transformation. + for (const RewritePhi &Phi : RewritePhiSet) { + PHINode *PN = Phi.PN; + Value *ExitVal = Phi.Val; + + // Only do the rewrite when the ExitValue can be expanded cheaply. + // If LoopCanBeDel is true, rewrite exit value aggressively. + if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { + DeadInsts.push_back(ExitVal); + continue; + } + + Changed = true; + ++NumReplaced; + Instruction *Inst = cast(PN->getIncomingValue(Phi.Ith)); + PN->setIncomingValue(Phi.Ith, ExitVal); + + // If this instruction is dead now, delete it. Don't do it now to avoid + // invalidating iterators. + if (isInstructionTriviallyDead(Inst, TLI)) + DeadInsts.push_back(Inst); + + // If we determined that this PHI is safe to replace even if an LCSSA + // PHI, do so. + if (Phi.SafePhi) { + PN->replaceAllUsesWith(ExitVal); + PN->eraseFromParent(); + } + } + // The insertion point instruction may have been deleted; clear it out // so that the rewriter doesn't trip over it later. Rewriter.clearInsertPoint(); } +/// CanLoopBeDeleted - Check whether it is possible to delete the loop after +/// rewriting exit value. If it is possible, ignore ReplaceExitValue and +/// do rewriting aggressively. +bool IndVarSimplify::CanLoopBeDeleted( + Loop *L, SmallVector &RewritePhiSet) { + + BasicBlock *Preheader = L->getLoopPreheader(); + // If there is no preheader, the loop will not be deleted. + if (!Preheader) + return false; + + // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. + // We obviate multiple ExitingBlocks case for simplicity. + // TODO: If we see testcase with multiple ExitingBlocks can be deleted + // after exit value rewriting, we can enhance the logic here. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) + return false; + + BasicBlock *ExitBlock = ExitBlocks[0]; + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast(BI)) { + Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); + + // If the Incoming value of P is found in RewritePhiSet, we know it + // could be rewritten to use a loop invariant value in transformation + // phase later. Skip it in the loop invariant check below. + bool found = false; + for (const RewritePhi &Phi : RewritePhiSet) { + unsigned i = Phi.Ith; + if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { + found = true; + break; + } + } + + Instruction *I; + if (!found && (I = dyn_cast(Incoming))) + if (!L->hasLoopInvariantOperands(I)) + return false; + + ++BI; + } + + for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); + LI != LE; ++LI) { + for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE; + ++BI) { + if (BI->mayHaveSideEffects()) + return false; + } + } + + return true; +} + //===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -1867,7 +1980,8 @@ // loop into any instructions outside of the loop that use the final values of // the current expressions. // - if (!isa(BackedgeTakenCount)) + if (ReplaceExitValue != NeverRepl && + !isa(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. Index: llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test2.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -0,0 +1,52 @@ +; PR23538 +; RUN: opt < %s -indvars -loop-deletion -S | FileCheck %s + +; Check IndVarSimplify should not replace exit value because or else +; udiv will be introduced by expand and the cost will be high. +; +; CHECK-LABEL: @_Z3fooPKcjj( +; CHECK-NOT: udiv + +declare void @_Z3mixRjj(i32* dereferenceable(4), i32) +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) + +define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) { +entry: + %a = alloca i32, align 4 + %tmp = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %tmp) + store i32 -1640531527, i32* %a, align 4 + %cmp8 = icmp ugt i32 %len, 11 + br i1 %cmp8, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %keylen.010 = phi i32 [ %len, %while.body.lr.ph ], [ %sub, %while.body ] + %s.addr.09 = phi i8* [ %s, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %tmp1 = bitcast i8* %s.addr.09 to i32* + %tmp2 = load i32, i32* %tmp1, align 4 + %shl.i = shl i32 %tmp2, 1 + %and.i = and i32 %shl.i, 16843008 + %tmp3 = load i32, i32* %a, align 4 + %sub.i = add i32 %tmp3, %tmp2 + %add = sub i32 %sub.i, %and.i + store i32 %add, i32* %a, align 4 + %add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12 + %sub = add i32 %keylen.010, -12 + %cmp = icmp ugt i32 %sub, 11 + br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: ; preds = %while.body + %sub.lcssa = phi i32 [ %sub, %while.body ] + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry + %keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ] + call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa) + %tmp4 = load i32, i32* %a, align 4 + call void @llvm.lifetime.end(i64 4, i8* %tmp) + ret i32 %tmp4 +} Index: llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test3.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test3.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/exit_value_test3.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -indvars -loop-deletion -S |FileCheck %s + +; Check IndVarSimplify should replace exit value even if the expansion cost +; is high because the loop can be deleted after the exit value rewrite. +; +; CHECK-LABEL: @_Z3fooPKcjj( +; CHECK: udiv +; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]: +; CHECK-NOT: br {{.*}} [[LABEL]] + +define i32 @_Z3fooPKcjj(i8* nocapture readnone %s, i32 %len, i32 %c) #0 { +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %klen.0 = phi i32 [ %len, %entry ], [ %sub, %while.cond ] + %cmp = icmp ugt i32 %klen.0, 11 + %sub = add i32 %klen.0, -12 + br i1 %cmp, label %while.cond, label %while.end + +while.end: ; preds = %while.cond + %klen.0.lcssa = phi i32 [ %klen.0, %while.cond ] + ret i32 %klen.0.lcssa +} Index: llvm/trunk/test/Transforms/IndVarSimplify/lcssa-preservation.ll =================================================================== --- llvm/trunk/test/Transforms/IndVarSimplify/lcssa-preservation.ll +++ llvm/trunk/test/Transforms/IndVarSimplify/lcssa-preservation.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -indvars -S | FileCheck %s -; +; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s ; Make sure IndVars preserves LCSSA form, especially across loop nests. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"