diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2931,15 +2931,6 @@ Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); - // Limit recursion calls depth. - if (Depth > MaxArithDepth || hasHugeExpression(Ops)) - return getOrCreateMulExpr(Ops, Flags); - - if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) { - static_cast(S)->setNoWrapFlags(Flags); - return S; - } - // If there are any constants, fold them together. unsigned Idx = 0; if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { @@ -2966,8 +2957,8 @@ ConstantInt *Fold = ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); Ops[0] = getConstant(Fold); + if (Ops.size() == 2) return Ops[0]; Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; LHSC = cast(Ops[0]); } @@ -3010,6 +3001,15 @@ return Ops[0]; } + // Limit recursion calls depth. + if (Depth > MaxArithDepth || hasHugeExpression(Ops)) + return getOrCreateMulExpr(Ops, Flags); + + if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) { + static_cast(S)->setNoWrapFlags(Flags); + return S; + } + // Skip over the add expression until we get to a multiply. while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) ++Idx; diff --git a/llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll b/llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/depth-limit-overrun.ll @@ -0,0 +1,68 @@ +; RUN: opt -passes 'strength-reduce' -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s +; RUN: opt -loop-reduce -scalar-evolution-max-arith-depth=2 -S < %s | FileCheck %s + +; This test should just compile cleanly without assertions. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" + +define void @test(i32 %A, i32 %B, i32 %C) { +; CHECK-LABEL: @test( +; CHECK: inner_loop: +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32 +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 +; CHECK: [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 3 +; CHECK-NEXT: [[LSR_IV_NEXT2:%.*]] = add i32 [[LSR_IV1]], 3 +; CHECK-NEXT: [[LSR_IV_NEXT4:%.*]] = add i32 [[LSR_IV3]], -3 +; +entry: + br label %outer_loop + +outer_loop: + %phi2 = phi i32 [ %A, %entry ], [ 204, %outer_tail ] + %phi3 = phi i32 [ %A, %entry ], [ 243, %outer_tail ] + %phi4 = phi i32 [ %B, %entry ], [ %i35, %outer_tail ] + br label %guard + +guard: + %lcmp.mod = icmp eq i32 %C, 0 + br i1 %lcmp.mod, label %outer_tail, label %preheader + +preheader: + %i15 = shl i32 %B, 1 + br label %inner_loop + +inner_loop: + %phi5 = phi i32 [ %phi3, %preheader ], [ %i30, %inner_loop ] + %phi6 = phi i32 [ %phi2, %preheader ], [ %i33, %inner_loop ] + %iter = phi i32 [ %C, %preheader ], [ %iter.sub, %inner_loop ] + %i17 = sub i32 %phi4, %phi6 + %i18 = sub i32 14, %phi5 + %i19 = mul i32 %i18, %C + %factor.prol = shl i32 %phi5, 1 + %i20 = add i32 %i17, %factor.prol + %i21 = add i32 %i20, %B + %i22 = add i32 %i21, %i19 + %i23 = sub i32 14, %i22 + %i24 = mul i32 %i23, %C + %factor.1.prol = shl i32 %i22, 1 + %i25 = add i32 %i17, %factor.1.prol + %i27 = add i32 %i25, %i24 + %i29 = mul i32 %i25, %C + %factor.2.prol = shl i32 %i27, 1 + %i30 = add i32 %i17, %factor.2.prol + %i33 = add nsw i32 %phi6, -3 + %iter.sub = add i32 %iter, -1 + %iter.cmp = icmp eq i32 %iter.sub, 0 + br i1 %iter.cmp, label %outer_tail, label %inner_loop + +outer_tail: + %phi7 = phi i32 [ %phi2, %guard ], [ %i33, %inner_loop ] + %i35 = sub i32 %A, %phi7 + %cmp = icmp sgt i32 %i35, 9876 + br i1 %cmp, label %exit, label %outer_loop + +exit: + ret void + +} diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll --- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll +++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll @@ -26,7 +26,7 @@ define void @test_mul(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test_mul ; CHECK: %s2 = mul i32 %s1, %p3 -; CHECK-NEXT: --> (2 * 3 * 4 * 5 * 6 * 7 * %a * %b * %c * %d * %e * %f) +; CHECK-NEXT: --> (5040 * %a * %b * %c * %d * %e * %f) %tmp0 = mul i32 %a, 2 %tmp1 = mul i32 %b, 3 %tmp2 = mul i32 %c, 4