Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1807,6 +1807,23 @@ bool SCEVExpander::isHighCostExpansionHelper( const SCEV *S, Loop *L, SmallPtrSetImpl &Processed) { + + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, Processed); + case scZeroExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, Processed); + case scSignExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, Processed); + } + if (!Processed.insert(S).second) return false; @@ -1849,23 +1866,22 @@ } } - // Recurse past add expressions, which commonly occur in the + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa(S) || isa(S)) + return true; + + // Recurse past nary expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. - if (const SCEVAddExpr *Add = dyn_cast(S)) { - for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + if (const SCEVNAryExpr *NAry = dyn_cast(S)) { + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if (isHighCostExpansionHelper(*I, L, Processed)) return true; } - return false; } - // HowManyLessThans uses a Max expression whenever the loop is not guarded by - // the exit condition. - if (isa(S) || isa(S)) - return true; - // If we haven't recognized an expensive SCEV pattern, assume it's an // expression produced by program code. return false; Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -68,6 +68,22 @@ static cl::opt ReduceLiveIVs("liv-reduce", cl::Hidden, cl::desc("Reduce live induction variables.")); +enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, AlwaysRepl }; + +static cl::opt ReplaceExitValue( + "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), + cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), + cl::values( + clEnumValN(NeverRepl, "never", "never replace exit value"), + clEnumValN(OnlyCheapRepl, "cheap", + "only replace exit value when the cost is cheap"), + clEnumValN(AlwaysRepl, "always", + "always replace exit value whenever possible"), + clEnumValEnd)); + +static cl::opt ReplaceExitVal("repl-exitval", cl::Hidden, + cl::desc(".")); + namespace { class IndVarSimplify : public LoopPass { LoopInfo *LI; @@ -595,6 +611,12 @@ continue; } + // Only do the rewrite when the ExitValue can be expanded cheaply. + if (ReplaceExitValue == OnlyCheapRepl && + Rewriter.isHighCostExpansion(ExitValue, L)) { + continue; + } + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' @@ -1878,7 +1900,8 @@ // loop into any instructions outside of the loop that use the final values of // the current expressions. // - if (!isa(BackedgeTakenCount)) + if (ReplaceExitValue != NeverRepl && + !isa(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); // Eliminate redundant IV cycles. Index: test/Transforms/IndVarSimplify/exit_value_test2.ll =================================================================== --- test/Transforms/IndVarSimplify/exit_value_test2.ll +++ test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -0,0 +1,52 @@ +; PR23538 +; RUN: opt < %s -indvars -S | FileCheck %s + +; Check IndVarSimplify should not replace exit value because or else +; udiv will be introduced by expand and the cost will be high. +; +; CHECK-LABEL: define void @_Z3fooPKcjj( +; CHECK: udiv + +declare void @_Z3mixRjj(i32* dereferenceable(4), i32) +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) + +define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) { +entry: + %a = alloca i32, align 4 + %tmp = bitcast i32* %a to i8* + call void @llvm.lifetime.start(i64 4, i8* %tmp) + store i32 -1640531527, i32* %a, align 4 + %cmp8 = icmp ugt i32 %len, 11 + br i1 %cmp8, label %while.body.lr.ph, label %while.end + +while.body.lr.ph: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + %keylen.010 = phi i32 [ %len, %while.body.lr.ph ], [ %sub, %while.body ] + %s.addr.09 = phi i8* [ %s, %while.body.lr.ph ], [ %add.ptr, %while.body ] + %tmp1 = bitcast i8* %s.addr.09 to i32* + %tmp2 = load i32, i32* %tmp1, align 4 + %shl.i = shl i32 %tmp2, 1 + %and.i = and i32 %shl.i, 16843008 + %tmp3 = load i32, i32* %a, align 4 + %sub.i = add i32 %tmp3, %tmp2 + %add = sub i32 %sub.i, %and.i + store i32 %add, i32* %a, align 4 + %add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12 + %sub = add i32 %keylen.010, -12 + %cmp = icmp ugt i32 %sub, 11 + br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge + +while.cond.while.end_crit_edge: ; preds = %while.body + %sub.lcssa = phi i32 [ %sub, %while.body ] + br label %while.end + +while.end: ; preds = %while.cond.while.end_crit_edge, %entry + %keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ] + call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa) + %tmp4 = load i32, i32* %a, align 4 + call void @llvm.lifetime.end(i64 4, i8* %tmp) + ret i32 %tmp4 +} Index: test/Transforms/IndVarSimplify/exit_value_tests.ll =================================================================== --- test/Transforms/IndVarSimplify/exit_value_tests.ll +++ test/Transforms/IndVarSimplify/exit_value_tests.ll @@ -2,7 +2,7 @@ ; these loops all have predictable exit values we can replace the use outside ; of the loop with a closed-form computation, making the loop dead. ; -; RUN: opt < %s -indvars -loop-deletion -simplifycfg | \ +; RUN: opt < %s -indvars -replexitval=always -loop-deletion -simplifycfg | \ ; RUN: llvm-dis | not grep br define i32 @polynomial_constant() { Index: test/Transforms/IndVarSimplify/lcssa-preservation.ll =================================================================== --- test/Transforms/IndVarSimplify/lcssa-preservation.ll +++ test/Transforms/IndVarSimplify/lcssa-preservation.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -S | FileCheck %s +; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s ; ; Make sure IndVars preserves LCSSA form, especially across loop nests. Index: test/Transforms/IndVarSimplify/loop_evaluate_1.ll =================================================================== --- test/Transforms/IndVarSimplify/loop_evaluate_1.ll +++ test/Transforms/IndVarSimplify/loop_evaluate_1.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | FileCheck %s +; RUN: opt < %s -indvars -replexitval=always -loop-deletion -simplifycfg -S | FileCheck %s ; Testcase distilled from 256.bzip2 ; CHECK-LABEL: @test1 Index: test/Transforms/IndVarSimplify/loop_evaluate_6.ll =================================================================== --- test/Transforms/IndVarSimplify/loop_evaluate_6.ll +++ test/Transforms/IndVarSimplify/loop_evaluate_6.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1 +; RUN: opt < %s -indvars -replexitval=always -loop-deletion -S | grep phi | count 1 define i32 @test(i32 %x_offs) nounwind readnone { entry: