diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -430,7 +430,8 @@ ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, - SmallVector &DeadInsts); + SmallVector &DeadInsts, + unsigned SCEVNAryLimit = 32); /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -118,6 +118,11 @@ clEnumValN(AlwaysRepl, "always", "always replace exit value whenever possible"))); +static cl::opt SCEVNAryLimit( + "scev-nary-limit", cl::Hidden, cl::init(32), + cl::desc("Limitation of the count of NAry-type operands to determine " + "whether to expand a NArySCEVExpr for replacing loop exit value")); + static cl::opt UsePostIncrementRanges( "indvars-post-increment-ranges", cl::Hidden, cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), @@ -1774,7 +1779,7 @@ // of the current expressions. if (ReplaceExitValue != NeverRepl) { if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, - ReplaceExitValue, DeadInsts)) { + ReplaceExitValue, DeadInsts, SCEVNAryLimit)) { NumReplaced += Rewrites; Changed = true; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1295,7 +1295,8 @@ const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, - SmallVector &DeadInsts) { + SmallVector &DeadInsts, + unsigned SCEVNAryLimit) { // Check a pre-condition. assert(L->isRecursivelyLCSSAForm(*DT, *LI) && "Indvars did not preserve LCSSA!"); @@ -1304,6 +1305,32 @@ L->getUniqueExitBlocks(ExitBlocks); SmallVector RewritePhiSet; + + // If the SCEVNAryExpr has too many operands, then it would be + // extremely complicated after expanding, thus cause stack exhaust + // if we want to traverse this IR's operands. + // eg: We might get segment fault during ISel cause building + // SelectionDAG needs to visit all the operands. + auto IsSCEVNAryExtremelyComplicated = [&](const SCEV *ExitValue) { + auto *NAry = cast(ExitValue); + SmallVector NAryVec; + NAryVec.push_back(NAry); + unsigned OpsCount = 0; + while (!NAryVec.empty()) { + NAry = NAryVec.pop_back_val(); + for (auto *Op : NAry->operands()) { + if (isa(Op)) { + NAryVec.push_back(cast(Op)); + } else { + ++OpsCount; + if (OpsCount > SCEVNAryLimit) { + return true; + } + } + } + } + return false; + }; // Find all values that are computed inside the loop, but used outside of it. // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan // the exit blocks of the loop to find them. @@ -1355,6 +1382,10 @@ // expression reuse by the SCEVExpander), but resort to per-exit // evaluation if that fails. const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + if (isa(ExitValue) && + IsSCEVNAryExtremelyComplicated(ExitValue)) { + continue; + } if (isa(ExitValue) || !SE->isLoopInvariant(ExitValue, L) || !isSafeToExpand(ExitValue, *SE)) { diff --git a/llvm/test/Transforms/IndVarSimplify/avoid-rewriting-loop-exit-value.ll b/llvm/test/Transforms/IndVarSimplify/avoid-rewriting-loop-exit-value.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/avoid-rewriting-loop-exit-value.ll @@ -0,0 +1,51 @@ +;RUN: opt -indvars -S < %s | FileCheck %s + +%struct.ad = type { i8 } +%struct.ae = type { %struct.ad } +@aj = dso_local global [5 x [0 x [5 x %struct.ad]]] zeroinitializer, align 1 +@al = dso_local global %struct.ae zeroinitializer, align 1 + +define dso_local i32 @_Z2aqv() local_unnamed_addr { +; CHECK-LABEL: @_Z2aqv( +; CHECK: _ZL2apiPiiS_.exit: +; CHECK-NEXT: [[ADD_I_2_I_LCSSA:%.*]] = phi i32 [ [[ADD_I_2_I:%.*]], [[FOR_COND4_PREHEADER_I:%.*]] ] +; CHECK-NEXT: [[CONV8_I:%.*]] = sext i32 [[ADD_I_2_I_LCSSA:%.*]] to i64 +; CHECK-NEXT: ret i32 0 +; +entry: + br label %for.cond4.preheader.i + +for.cond4.preheader.i: ; preds = %for.cond4.preheader.i, %entry + %av.addr.04.i = phi i32 [ 0, %entry ], [ %sub.i.2.i, %for.cond4.preheader.i ] + %aw.sroa.0.03.i = phi i32 [ 0, %entry ], [ %dec.i, %for.cond4.preheader.i ] + %add.i.i = add nsw i32 %av.addr.04.i, zext (i1 icmp eq (%struct.ad* bitcast (i8* getelementptr inbounds ([5 x [0 x [5 x %struct.ad]]], [5 x [0 x [5 x %struct.ad]]]* @aj, i64 0, i64 0, i64 9, i64 1, i32 0) to %struct.ad*), %struct.ad* getelementptr inbounds (%struct.ae, %struct.ae* @al, i64 0, i32 0)) to i32) + %cmp.i.i = icmp sgt i32 %av.addr.04.i, 7 + %add.i.i.op = shl i32 %add.i.i, 16 + %add.i.i.op.op = ashr i32 %add.i.i.op, 31 + %add.i.i.op.op.op = xor i32 %add.i.i.op.op, -1 + %sext.i.lobit.not = select i1 %cmp.i.i, i32 -1, i32 %add.i.i.op.op.op + %sub.i.i = add i32 %sext.i.lobit.not, %av.addr.04.i + %add.i.1.i = add nsw i32 %sub.i.i, zext (i1 icmp eq (%struct.ad* bitcast (i8* getelementptr inbounds ([5 x [0 x [5 x %struct.ad]]], [5 x [0 x [5 x %struct.ad]]]* @aj, i64 0, i64 0, i64 9, i64 1, i32 0) to %struct.ad*), %struct.ad* getelementptr inbounds (%struct.ae, %struct.ae* @al, i64 0, i32 0)) to i32) + %cmp.i.1.i = icmp sgt i32 %sub.i.i, 7 + %add.i.1.i.op = shl i32 %add.i.1.i, 16 + %add.i.1.i.op.op = ashr i32 %add.i.1.i.op, 31 + %add.i.1.i.op.op.op = xor i32 %add.i.1.i.op.op, -1 + %sext.1.i.lobit.not = select i1 %cmp.i.1.i, i32 -1, i32 %add.i.1.i.op.op.op + %sub.i.1.i = add i32 %sext.1.i.lobit.not, %sub.i.i + %add.i.2.i = add nsw i32 %sub.i.1.i, zext (i1 icmp eq (%struct.ad* bitcast (i8* getelementptr inbounds ([5 x [0 x [5 x %struct.ad]]], [5 x [0 x [5 x %struct.ad]]]* @aj, i64 0, i64 0, i64 9, i64 1, i32 0) to %struct.ad*), %struct.ad* getelementptr inbounds (%struct.ae, %struct.ae* @al, i64 0, i32 0)) to i32) + %cmp.i.2.i = icmp sgt i32 %sub.i.1.i, 7 + %add.i.2.i.op = shl i32 %add.i.2.i, 16 + %add.i.2.i.op.op = ashr i32 %add.i.2.i.op, 31 + %add.i.2.i.op.op.op = xor i32 %add.i.2.i.op.op, -1 + %sext.2.i.lobit.not = select i1 %cmp.i.2.i, i32 -1, i32 %add.i.2.i.op.op.op + %sub.i.2.i = add i32 %sext.2.i.lobit.not, %sub.i.1.i + %dec.i = add nsw i32 %aw.sroa.0.03.i, -1 + %cmp.i = icmp sgt i32 %aw.sroa.0.03.i, -10 + br i1 %cmp.i, label %for.cond4.preheader.i, label %_ZL2apiPiiS_.exit + +_ZL2apiPiiS_.exit: ; preds = %for.cond4.preheader.i + %add.i.2.i.lcssa = phi i32 [ %add.i.2.i, %for.cond4.preheader.i ] + %conv8.i = sext i32 %add.i.2.i.lcssa to i64 + ret i32 0 +} +