Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -2051,7 +2051,8 @@ const DataLayout &DL = L->getHeader()->getParent()->getParent()->getDataLayout(); unsigned Width = cast(UDivExpr->getType())->getBitWidth(); - return DL.isIllegalInteger(Width); + return DL.isIllegalInteger(Width) || + isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, Processed); } // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or Index: test/Analysis/ScalarEvolution/expensive-expansion.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/expensive-expansion.ll @@ -0,0 +1,41 @@ +; RUN: opt -loop-unroll -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define void @fn(i32 %start) local_unnamed_addr #0 { +; CHECK-LABEL: fn +; CHECK-NOT: for.body.prol + +; LoopUnroll should not unroll this loop as the computed trip count is much +; too expensive. The SCEV under consideration is: +; ((2 + (-10 smax (-1 + (-1 * %start))) + %start) /u 2) +; SCEVExpander previously thought this was cheap as it had a legal +; division-by-power-of-2 on the RHS, but the LHS is quite large already. + +entry: + %cmp1 = icmp sgt i32 %start, 7 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.02 = phi i32 [ %sub, %for.body ], [ %start, %for.body.preheader ] + %0 = load volatile i32, i32* @g, align 4 + %inc = add nsw i32 %0, 1 + store volatile i32 %inc, i32* @g, align 4 + %sub = add nsw i32 %i.02, -2 + %cmp = icmp sgt i32 %i.02, 9 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/Transforms/IndVarSimplify/no-iv-rewrite.ll =================================================================== --- test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -225,9 +225,9 @@ ; This test originally checked that the OR instruction was cloned. Now the ; ScalarEvolution is able to understand the loop evolution and that '%iv' at the -; end of the loop is an even value. Thus '%val' is computed at the end of the -; loop and the OR instruction is replaced by an ADD keeping the result -; equivalent. +; end of the loop is an even value. However, the expression is too expensive to +; compute as the loop is not guarded, so the OR is not cloned (and changed into +; an add) as it was before. ; ; CHECK: sext ; CHECK: loop: @@ -235,7 +235,7 @@ ; CHECK-NOT: sext ; CHECK: icmp slt i64 ; CHECK: exit: -; CHECK: add i64 +; CHECK-NOT: add i64 loop: %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ] %t1 = sext i32 %iv to i64