diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1575,9 +1575,17 @@ ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); /// We know that there is no SCEV for the specified value. Analyze the - /// expression. + /// expression recursively. const SCEV *createSCEV(Value *V); + /// We know that there is no SCEV for the specified value. Create a new SCEV + /// for \p V iteratively. + const SCEV *createSCEVIter(Value *V); + /// Collect operands of \p V for which SCEV expressions should be constructed + /// first. Returns a SCEV directly if it can be constructed trivially for \p + /// V. + const SCEV *getOperandsToCreate(Value *V, SmallVectorImpl &Ops); + /// Provide the special handling we need to analyze PHI SCEVs. const SCEV *createNodeForPHI(PHINode *PN); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4406,18 +4406,9 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - const SCEV *S = getExistingSCEV(V); - if (S == nullptr) { - S = createSCEV(V); - // During PHI resolution, it is possible to create two SCEVs for the same - // V, so it is needed to double check whether V->S is inserted into - // ValueExprMap before insert S->{V, 0} into ExprValueMap. - std::pair Pair = - ValueExprMap.insert({SCEVCallbackVH(V, this), S}); - if (Pair.second) - ExprValueMap[S].insert(V); - } - return S; + if (const SCEV *S = getExistingSCEV(V)) + return S; + return createSCEVIter(V); } const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { @@ -7186,6 +7177,177 @@ return isFinite(L) || (isMustProgress(L) && loopHasNoSideEffects(L)); } +const SCEV *ScalarEvolution::createSCEVIter(Value *V) { + // Worklist item with a Value and a bool indicating whether all operands have + // been visited already. + using PointerTy = PointerIntPair; + SmallVector Stack; + + Stack.emplace_back(V, true); + Stack.emplace_back(V, false); + while (!Stack.empty()) { + auto E = Stack.pop_back_val(); + Value *CurV = E.getPointer(); + + if (getExistingSCEV(CurV)) + continue; + + SmallVector Ops; + const SCEV *CreatedSCEV = nullptr; + // If all operands have been visited already, create the SCEV. + if (E.getInt()) { + CreatedSCEV = createSCEV(CurV); + } else { + // Otherwise get the operands we need to create SCEV's for before creating + // the SCEV for CurV. If the SCEV for CurV can be constructed trivially, + // just use it. + CreatedSCEV = getOperandsToCreate(CurV, Ops); + } + + if (CreatedSCEV) { + insertValueToMap(CurV, CreatedSCEV); + } else { + // Queue CurV for SCEV creation, followed by its's operands which need to + // be constructed first. + Stack.emplace_back(CurV, true); + for (Value *Op : Ops) + Stack.emplace_back(Op, false); + } + } + + return getExistingSCEV(V); +} + +const SCEV * +ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl &Ops) { + if (!isSCEVable(V->getType())) + return getUnknown(V); + + if (Instruction *I = dyn_cast(V)) { + // Don't attempt to analyze instructions in blocks that aren't + // reachable. Such instructions don't matter, and they aren't required + // to obey basic rules for definitions dominating uses which this + // analysis depends on. + if (!DT.isReachableFromEntry(I->getParent())) + return getUnknown(PoisonValue::get(V->getType())); + } else if (ConstantInt *CI = dyn_cast(V)) + return getConstant(CI); + else if (GlobalAlias *GA = dyn_cast(V)) { + if (!GA->isInterposable()) { + Ops.push_back(GA->getAliasee()); + return nullptr; + } + return getUnknown(V); + } else if (!isa(V)) + return getUnknown(V); + + Operator *U = cast(V); + if (auto BO = MatchBinaryOp(U, DT)) { + bool IsConstArg = isa(BO->RHS); + switch (U->getOpcode()) { + case Instruction::AShr: + case Instruction::Shl: + case Instruction::Xor: + if (!IsConstArg) + return nullptr; + break; + case Instruction::And: + case Instruction::Or: + if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1)) + return nullptr; + break; + default: + break; + } + + Ops.push_back(BO->LHS); + Ops.push_back(BO->RHS); + return nullptr; + } + + switch (U->getOpcode()) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::PtrToInt: + Ops.push_back(U->getOperand(0)); + return nullptr; + + case Instruction::BitCast: + if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) { + Ops.push_back(U->getOperand(0)); + return nullptr; + } + return getUnknown(V); + + case Instruction::SDiv: + case Instruction::SRem: + Ops.push_back(U->getOperand(0)); + Ops.push_back(U->getOperand(1)); + return nullptr; + + case Instruction::GetElementPtr: + if (cast(U)->getSourceElementType()->isSized()) { + for (Value *Index : U->operands()) + Ops.push_back(Index); + return nullptr; + } + return getUnknown(V); + + case Instruction::IntToPtr: + return getUnknown(V); + + case Instruction::PHI: + // Keep constructing SCEVs' for phis recursively for now. + return nullptr; + + case Instruction::Select: + // U can also be a select constant expr, which let fall through. Since + // createNodeForSelect only works for a condition that is an `ICmpInst`, and + // constant expressions cannot have instructions as operands, we'd have + // returned getUnknown for a select constant expressions anyway. + if (isa(U)) { + for (Value *Inc : cast(U)->operands()) + Ops.push_back(Inc); + return nullptr; + } + return getUnknown(V); + break; + + case Instruction::Call: + case Instruction::Invoke: + if (Value *RV = cast(U)->getReturnedArgOperand()) { + Ops.push_back(RV); + return nullptr; + } + + if (auto *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::abs: + Ops.push_back(II->getArgOperand(0)); + return nullptr; + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::usub_sat: + case Intrinsic::uadd_sat: + Ops.push_back(II->getArgOperand(0)); + Ops.push_back(II->getArgOperand(1)); + return nullptr; + case Intrinsic::start_loop_iterations: + Ops.push_back(II->getArgOperand(0)); + return nullptr; + default: + break; + } + } + break; + } + + return nullptr; +} + const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll --- a/llvm/test/Analysis/Delinearization/a.ll +++ b/llvm/test/Analysis/Delinearization/a.ll @@ -10,7 +10,7 @@ ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes. -; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>] +; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>] define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll @@ -11,7 +11,7 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us> ; CHECK: Base offset: %A ; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>] +; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>] define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable { entry: diff --git a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll --- a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll @@ -34,9 +34,7 @@ ; DEFAULT-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[INNER_LOOP]] ], [ [[TMP5]], [[PREHEADER]] ] ; DEFAULT-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[TMP3]], [[PREHEADER]] ] ; DEFAULT-NEXT: [[PHI5:%.*]] = phi i32 [ [[PHI3]], [[PREHEADER]] ], [ [[I30:%.*]], [[INNER_LOOP]] ] -; DEFAULT-NEXT: [[PHI6:%.*]] = phi i32 [ [[PHI2]], [[PREHEADER]] ], [ [[I33:%.*]], [[INNER_LOOP]] ] ; DEFAULT-NEXT: [[ITER:%.*]] = phi i32 [ [[C]], [[PREHEADER]] ], [ [[ITER_SUB:%.*]], [[INNER_LOOP]] ] -; DEFAULT-NEXT: [[I17:%.*]] = sub i32 [[PHI4]], [[PHI6]] ; DEFAULT-NEXT: [[I18:%.*]] = sub i32 14, [[PHI5]] ; DEFAULT-NEXT: [[I19:%.*]] = mul i32 [[I18]], [[C]] ; DEFAULT-NEXT: [[FACTOR_PROL:%.*]] = shl i32 [[PHI5]], 1 @@ -54,8 +52,7 @@ ; DEFAULT-NEXT: [[TMP19:%.*]] = add i32 [[LSR_IV]], [[TMP18]] ; DEFAULT-NEXT: [[I29:%.*]] = mul i32 [[TMP11]], [[C]] ; DEFAULT-NEXT: [[FACTOR_2_PROL:%.*]] = shl i32 [[TMP19]], 1 -; DEFAULT-NEXT: [[I30]] = add i32 [[I17]], [[FACTOR_2_PROL]] -; DEFAULT-NEXT: [[I33]] = add i32 [[PHI6]], -3 +; DEFAULT-NEXT: [[I30]] = add i32 [[LSR_IV]], [[FACTOR_2_PROL]] ; DEFAULT-NEXT: [[ITER_SUB]] = add i32 [[ITER]], -1 ; DEFAULT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 3 ; DEFAULT-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 3 @@ -74,61 +71,69 @@ ; ; LIMIT-LABEL: @test( ; LIMIT-NEXT: entry: +; LIMIT-NEXT: [[TMP0:%.*]] = shl i32 [[B:%.*]], 1 +; LIMIT-NEXT: [[TMP1:%.*]] = shl i32 [[C:%.*]], 1 ; LIMIT-NEXT: br label [[OUTER_LOOP:%.*]] ; LIMIT: outer_loop: ; LIMIT-NEXT: [[PHI2:%.*]] = phi i32 [ [[A:%.*]], [[ENTRY:%.*]] ], [ 204, [[OUTER_TAIL:%.*]] ] ; LIMIT-NEXT: [[PHI3:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ 243, [[OUTER_TAIL]] ] -; LIMIT-NEXT: [[PHI4:%.*]] = phi i32 [ [[B:%.*]], [[ENTRY]] ], [ [[I35:%.*]], [[OUTER_TAIL]] ] +; LIMIT-NEXT: [[PHI4:%.*]] = phi i32 [ [[B]], [[ENTRY]] ], [ [[I35:%.*]], [[OUTER_TAIL]] ] ; LIMIT-NEXT: br label [[GUARD:%.*]] ; LIMIT: guard: -; LIMIT-NEXT: [[LCMP_MOD:%.*]] = icmp eq i32 [[C:%.*]], 0 +; LIMIT-NEXT: [[LCMP_MOD:%.*]] = icmp eq i32 [[C]], 0 ; LIMIT-NEXT: br i1 [[LCMP_MOD]], label [[OUTER_TAIL]], label [[PREHEADER:%.*]] ; LIMIT: preheader: ; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1 -; LIMIT-NEXT: [[TMP0:%.*]] = mul i32 [[PHI2]], -1 -; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], -1 -; LIMIT-NEXT: [[TMP2:%.*]] = sub i32 [[PHI4]], [[TMP1]] -; LIMIT-NEXT: [[TMP3:%.*]] = add i32 [[B]], [[PHI4]] -; LIMIT-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[TMP1]] -; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 14, [[TMP4]] +; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[PHI2]], -1 +; LIMIT-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], -1 +; LIMIT-NEXT: [[TMP4:%.*]] = sub i32 [[PHI4]], [[TMP3]] +; LIMIT-NEXT: [[TMP5:%.*]] = mul i32 [[PHI4]], 3 +; LIMIT-NEXT: [[TMP6:%.*]] = add i32 [[TMP0]], [[TMP5]] +; LIMIT-NEXT: [[TMP7:%.*]] = mul i32 [[PHI2]], 3 +; LIMIT-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]] +; LIMIT-NEXT: [[TMP9:%.*]] = add i32 [[B]], [[PHI4]] +; LIMIT-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], [[TMP3]] +; LIMIT-NEXT: [[TMP11:%.*]] = sub i32 14, [[TMP10]] ; LIMIT-NEXT: br label [[INNER_LOOP:%.*]] ; LIMIT: inner_loop: -; LIMIT-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[INNER_LOOP]] ], [ [[TMP5]], [[PREHEADER]] ] -; LIMIT-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[INNER_LOOP]] ], [ [[TMP4]], [[PREHEADER]] ] -; LIMIT-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ [[TMP2]], [[PREHEADER]] ] +; LIMIT-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[INNER_LOOP]] ], [ [[TMP11]], [[PREHEADER]] ] +; LIMIT-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[INNER_LOOP]] ], [ 0, [[PREHEADER]] ] +; LIMIT-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ], [ 0, [[PREHEADER]] ] ; LIMIT-NEXT: [[PHI5:%.*]] = phi i32 [ [[PHI3]], [[PREHEADER]] ], [ [[I30:%.*]], [[INNER_LOOP]] ] -; LIMIT-NEXT: [[PHI6:%.*]] = phi i32 [ [[PHI2]], [[PREHEADER]] ], [ [[I33:%.*]], [[INNER_LOOP]] ] ; LIMIT-NEXT: [[ITER:%.*]] = phi i32 [ [[C]], [[PREHEADER]] ], [ [[ITER_SUB:%.*]], [[INNER_LOOP]] ] -; LIMIT-NEXT: [[I17:%.*]] = sub i32 [[PHI4]], [[PHI6]] +; LIMIT-NEXT: [[TMP12:%.*]] = add i32 [[TMP4]], [[LSR_IV]] ; LIMIT-NEXT: [[I18:%.*]] = sub i32 14, [[PHI5]] ; LIMIT-NEXT: [[I19:%.*]] = mul i32 [[I18]], [[C]] -; LIMIT-NEXT: [[FACTOR_PROL:%.*]] = shl i32 [[PHI5]], 1 -; LIMIT-NEXT: [[TMP6:%.*]] = add i32 [[LSR_IV1]], [[I19]] -; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[FACTOR_PROL]] -; LIMIT-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 1 -; LIMIT-NEXT: [[TMP9:%.*]] = add i32 [[LSR_IV]], [[TMP8]] -; LIMIT-NEXT: [[TMP10:%.*]] = sub i32 [[LSR_IV3]], [[I19]] -; LIMIT-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[FACTOR_PROL]] -; LIMIT-NEXT: [[TMP12:%.*]] = mul i32 [[C]], [[TMP11]] -; LIMIT-NEXT: [[TMP13:%.*]] = add i32 [[LSR_IV1]], [[I19]] -; LIMIT-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[FACTOR_PROL]] -; LIMIT-NEXT: [[TMP15:%.*]] = shl i32 [[TMP14]], 1 -; LIMIT-NEXT: [[TMP16:%.*]] = add i32 [[TMP12]], [[TMP15]] -; LIMIT-NEXT: [[TMP17:%.*]] = add i32 [[LSR_IV]], [[TMP16]] -; LIMIT-NEXT: [[I29:%.*]] = mul i32 [[TMP9]], [[C]] -; LIMIT-NEXT: [[FACTOR_2_PROL:%.*]] = shl i32 [[TMP17]], 1 -; LIMIT-NEXT: [[I30]] = add i32 [[I17]], [[FACTOR_2_PROL]] -; LIMIT-NEXT: [[I33]] = add i32 [[PHI6]], -3 +; LIMIT-NEXT: [[TMP13:%.*]] = mul i32 [[TMP1]], [[I18]] +; LIMIT-NEXT: [[TMP14:%.*]] = add i32 [[LSR_IV1]], [[TMP13]] +; LIMIT-NEXT: [[TMP15:%.*]] = shl i32 [[PHI5]], 2 +; LIMIT-NEXT: [[TMP16:%.*]] = add i32 [[TMP14]], [[TMP15]] +; LIMIT-NEXT: [[TMP17:%.*]] = add i32 [[TMP8]], [[LSR_IV]] +; LIMIT-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], [[TMP16]] +; LIMIT-NEXT: [[TMP19:%.*]] = add i32 [[LSR_IV3]], 28 +; LIMIT-NEXT: [[TMP20:%.*]] = mul i32 [[PHI5]], -2 +; LIMIT-NEXT: [[TMP21:%.*]] = shl i32 [[TMP20]], 1 +; LIMIT-NEXT: [[TMP22:%.*]] = add i32 [[TMP19]], [[TMP21]] +; LIMIT-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], [[I19]] +; LIMIT-NEXT: [[TMP24:%.*]] = mul i32 [[C]], [[TMP23]] +; LIMIT-NEXT: [[TMP25:%.*]] = add i32 [[LSR_IV1]], [[TMP24]] +; LIMIT-NEXT: [[TMP26:%.*]] = add i32 [[TMP8]], [[LSR_IV]] +; LIMIT-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], [[TMP15]] +; LIMIT-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], [[TMP25]] +; LIMIT-NEXT: [[I29:%.*]] = mul i32 [[TMP18]], [[C]] +; LIMIT-NEXT: [[FACTOR_2_PROL:%.*]] = shl i32 [[TMP28]], 1 +; LIMIT-NEXT: [[I30]] = add i32 [[TMP12]], [[FACTOR_2_PROL]] ; LIMIT-NEXT: [[ITER_SUB]] = add i32 [[ITER]], -1 ; LIMIT-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 3 -; LIMIT-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 3 +; LIMIT-NEXT: [[LSR_IV_NEXT2]] = add i32 [[LSR_IV1]], 6 ; LIMIT-NEXT: [[LSR_IV_NEXT4]] = add i32 [[LSR_IV3]], -3 ; LIMIT-NEXT: [[ITER_CMP:%.*]] = icmp eq i32 [[ITER_SUB]], 0 ; LIMIT-NEXT: br i1 [[ITER_CMP]], label [[OUTER_TAIL_LOOPEXIT:%.*]], label [[INNER_LOOP]] ; LIMIT: outer_tail.loopexit: +; LIMIT-NEXT: [[TMP29:%.*]] = sub i32 [[PHI2]], [[LSR_IV_NEXT]] ; LIMIT-NEXT: br label [[OUTER_TAIL]] ; LIMIT: outer_tail: -; LIMIT-NEXT: [[PHI7:%.*]] = phi i32 [ [[PHI2]], [[GUARD]] ], [ [[I33]], [[OUTER_TAIL_LOOPEXIT]] ] +; LIMIT-NEXT: [[PHI7:%.*]] = phi i32 [ [[PHI2]], [[GUARD]] ], [ [[TMP29]], [[OUTER_TAIL_LOOPEXIT]] ] ; LIMIT-NEXT: [[I35]] = sub i32 [[A]], [[PHI7]] ; LIMIT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[I35]], 9876 ; LIMIT-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[OUTER_LOOP]] diff --git a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll --- a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll @@ -16,113 +16,112 @@ ; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]] ; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef ; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]] -; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7 -; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3 ; CHECK-NEXT: br label [[BB7:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i32 [ [[LSR_IV_NEXT2:%.*]], [[BB32:%.*]] ], [ 0, [[BB:%.*]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB32]] ], [ -8, [[BB]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[BB32:%.*]] ], [ -8, [[BB:%.*]] ] +; CHECK-NEXT: [[VAL9:%.*]] = phi i32 [ 0, [[BB]] ], [ [[VAL35:%.*]], [[BB32]] ] ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 8 -; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i32 [[LSR_IV1]], [[TMP5]] ; CHECK-NEXT: [[VAL10:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 65536 ; CHECK-NEXT: br i1 [[VAL10]], label [[BB12:%.*]], label [[BB11:%.*]] ; CHECK: bb11: ; CHECK-NEXT: unreachable ; CHECK: bb12: +; CHECK-NEXT: [[VAL13:%.*]] = add i32 [[VAL9]], [[VAL6]] ; CHECK-NEXT: [[VAL14:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL14]], label [[BB17:%.*]], label [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] +; CHECK-NEXT: br i1 [[VAL14]], label [[BB17:%.*]], label [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT:%.*]] ; CHECK: bb15splitsplitsplitsplitsplitsplit: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[VAL6]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT:%.*]] -; CHECK: bb12.bb15splitsplitsplitsplitsplit_crit_edge: -; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[VAL6]], [[LSR_IV1]] +; CHECK: bb17.bb15splitsplitsplitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[VAL]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL5]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLITSPLIT]] ; CHECK: bb15splitsplitsplitsplitsplit: -; CHECK-NEXT: [[VAL16_PH_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP6]], [[BB12_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE]] ], [ [[VAL35:%.*]], [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT:%.*]] ] +; CHECK-NEXT: [[VAL16_PH_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP7]], [[BB17_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT]] ] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT:%.*]] -; CHECK: bb17.bb15splitsplitsplitsplit_crit_edge: -; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[VAL]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = shl i32 [[VAL5]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[LSR_IV1]] +; CHECK: bb20.bb15splitsplitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[VAL]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[VAL5]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLITSPLIT]] ; CHECK: bb15splitsplitsplitsplit: -; CHECK-NEXT: [[VAL16_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP13]], [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLITSPLIT]] ] +; CHECK-NEXT: [[VAL16_PH_PH_PH_PH:%.*]] = phi i32 [ [[TMP14]], [[BB20_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLITSPLIT]] ] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT:%.*]] -; CHECK: bb20.bb15splitsplitsplit_crit_edge: -; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3 -; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]] +; CHECK: bb23.bb15splitsplitsplit_crit_edge: +; CHECK-NEXT: [[TMP15:%.*]] = shl i32 [[VAL]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP17:%.*]] = shl i32 [[TMP16]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = sub i32 [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = shl i32 [[VAL5]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]] ; CHECK: bb15splitsplitsplit: -; CHECK-NEXT: [[VAL16_PH_PH_PH:%.*]] = phi i32 [ [[TMP20]], [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLIT]] ] +; CHECK-NEXT: [[VAL16_PH_PH_PH:%.*]] = phi i32 [ [[TMP21]], [[BB23_BB15SPLITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH_PH]], [[BB15SPLITSPLITSPLITSPLIT]] ] ; CHECK-NEXT: br label [[BB15SPLITSPLIT:%.*]] -; CHECK: bb23.bb15splitsplit_crit_edge: -; CHECK-NEXT: [[TMP21:%.*]] = shl i32 [[VAL]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: [[TMP23:%.*]] = shl i32 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP21]], [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = shl i32 [[VAL5]], 2 -; CHECK-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], [[LSR_IV1]] +; CHECK: bb26.bb15splitsplit_crit_edge: +; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[VAL]], 5 +; CHECK-NEXT: [[TMP23:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 5 +; CHECK-NEXT: [[TMP25:%.*]] = sub i32 [[TMP22]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul i32 [[VAL5]], 5 +; CHECK-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLITSPLIT]] ; CHECK: bb15splitsplit: -; CHECK-NEXT: [[VAL16_PH_PH:%.*]] = phi i32 [ [[TMP27]], [[BB23_BB15SPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH]], [[BB15SPLITSPLITSPLIT]] ] +; CHECK-NEXT: [[VAL16_PH_PH:%.*]] = phi i32 [ [[TMP28]], [[BB26_BB15SPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH_PH]], [[BB15SPLITSPLITSPLIT]] ] ; CHECK-NEXT: br label [[BB15SPLIT:%.*]] -; CHECK: bb26.bb15split_crit_edge: -; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5 -; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5 -; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5 -; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]] -; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]] +; CHECK: bb29.bb15split_crit_edge: +; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL]], 6 +; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 6 +; CHECK-NEXT: [[TMP32:%.*]] = sub i32 [[TMP29]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = mul i32 [[VAL5]], 6 +; CHECK-NEXT: [[TMP34:%.*]] = sub i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = add i32 [[TMP34]], [[VAL9]] ; CHECK-NEXT: br label [[BB15SPLIT]] ; CHECK: bb15split: -; CHECK-NEXT: [[VAL16_PH:%.*]] = phi i32 [ [[TMP34]], [[BB26_BB15SPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH]], [[BB15SPLITSPLIT]] ] +; CHECK-NEXT: [[VAL16_PH:%.*]] = phi i32 [ [[TMP35]], [[BB29_BB15SPLIT_CRIT_EDGE:%.*]] ], [ [[VAL16_PH_PH]], [[BB15SPLITSPLIT]] ] ; CHECK-NEXT: br label [[BB15:%.*]] -; CHECK: bb29.bb15_crit_edge: -; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[VAL]], 6 -; CHECK-NEXT: [[TMP36:%.*]] = mul i32 [[VAL1]], [[VAL2]] -; CHECK-NEXT: [[TMP37:%.*]] = mul i32 [[TMP36]], 6 -; CHECK-NEXT: [[TMP38:%.*]] = sub i32 [[TMP35]], [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = mul i32 [[VAL5]], 6 -; CHECK-NEXT: [[TMP40:%.*]] = sub i32 [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = add i32 [[TMP40]], [[LSR_IV1]] +; CHECK: bb32.bb15_crit_edge: ; CHECK-NEXT: br label [[BB15]] ; CHECK: bb15: -; CHECK-NEXT: [[VAL16:%.*]] = phi i32 [ [[TMP41]], [[BB29_BB15_CRIT_EDGE:%.*]] ], [ [[VAL16_PH]], [[BB15SPLIT]] ] +; CHECK-NEXT: [[VAL16:%.*]] = phi i32 [ [[VAL35]], [[BB32_BB15_CRIT_EDGE:%.*]] ], [ [[VAL16_PH]], [[BB15SPLIT]] ] ; CHECK-NEXT: call void @widget() [ "deopt"(i32 [[VAL16]], i32 3, i32 [[VAL]]) ] ; CHECK-NEXT: unreachable ; CHECK: bb17: +; CHECK-NEXT: [[VAL18:%.*]] = add i32 [[VAL13]], [[VAL6]] ; CHECK-NEXT: [[VAL19:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL19]], label [[BB20:%.*]], label [[BB17_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE]] +; CHECK-NEXT: br i1 [[VAL19]], label [[BB20:%.*]], label [[BB17_BB15SPLITSPLITSPLITSPLITSPLIT_CRIT_EDGE]] ; CHECK: bb20: +; CHECK-NEXT: [[VAL21:%.*]] = add i32 [[VAL18]], [[VAL6]] ; CHECK-NEXT: [[VAL22:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL22]], label [[BB23:%.*]], label [[BB20_BB15SPLITSPLITSPLIT_CRIT_EDGE]] +; CHECK-NEXT: br i1 [[VAL22]], label [[BB23:%.*]], label [[BB20_BB15SPLITSPLITSPLITSPLIT_CRIT_EDGE]] ; CHECK: bb23: +; CHECK-NEXT: [[VAL24:%.*]] = add i32 [[VAL21]], [[VAL6]] ; CHECK-NEXT: [[VAL25:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL25]], label [[BB26:%.*]], label [[BB23_BB15SPLITSPLIT_CRIT_EDGE]] +; CHECK-NEXT: br i1 [[VAL25]], label [[BB26:%.*]], label [[BB23_BB15SPLITSPLITSPLIT_CRIT_EDGE]] ; CHECK: bb26: +; CHECK-NEXT: [[VAL27:%.*]] = add i32 [[VAL24]], [[VAL6]] ; CHECK-NEXT: [[VAL28:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL28]], label [[BB29:%.*]], label [[BB26_BB15SPLIT_CRIT_EDGE]] +; CHECK-NEXT: br i1 [[VAL28]], label [[BB29:%.*]], label [[BB26_BB15SPLITSPLIT_CRIT_EDGE]] ; CHECK: bb29: +; CHECK-NEXT: [[VAL30:%.*]] = add i32 [[VAL27]], [[VAL6]] ; CHECK-NEXT: [[VAL31:%.*]] = icmp slt i32 undef, undef -; CHECK-NEXT: br i1 [[VAL31]], label [[BB32]], label [[BB29_BB15_CRIT_EDGE]] +; CHECK-NEXT: br i1 [[VAL31]], label [[BB32]], label [[BB29_BB15SPLIT_CRIT_EDGE]] ; CHECK: bb32: -; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP4]], [[LSR_IV1]] -; CHECK-NEXT: [[VAL35]] = add i32 [[TMP42]], [[VAL6]] -; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB15SPLITSPLITSPLITSPLITSPLITSPLIT]] +; CHECK-NEXT: [[VAL33:%.*]] = add i32 [[VAL30]], [[VAL6]] +; CHECK-NEXT: [[VAL35]] = add i32 [[VAL33]], [[VAL6]] +; CHECK-NEXT: br i1 false, label [[BB7]], label [[BB32_BB15_CRIT_EDGE]] ; bb: %val = load i32, i32 addrspace(3)* undef, align 4