Index: llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h =================================================================== --- llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -39,6 +39,19 @@ bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, ScalarEvolution &SE); +/// struct for holding enough information to help calculate the cost of the +/// given SCEV when expanded into IR. +struct SCEVOperand { + explicit SCEVOperand(unsigned Opc, int Idx, const SCEV *S) : + ParentOpcode(Opc), OperandIdx(Idx), S(S) { } + /// Opcode of the instruction that uses the operand. + unsigned ParentOpcode; + /// The use index of an expanded instruction. + int OperandIdx; + /// The SCEV operand to be costed. + const SCEV* S = nullptr; +}; + /// This class uses information about analyze scalars to rewrite expressions /// in canonical form. /// @@ -193,14 +206,14 @@ assert(At && "This function requires At instruction to be provided."); if (!TTI) // In assert-less builds, avoid crashing return true; // by always claiming to be high-cost. - SmallVector Worklist; + SmallVector Worklist; SmallPtrSet Processed; int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic; - Worklist.emplace_back(Expr); + Worklist.emplace_back(0, 0, Expr); while (!Worklist.empty()) { - const SCEV *S = Worklist.pop_back_val(); - if (isHighCostExpansionHelper(S, L, *At, BudgetRemaining, *TTI, Processed, - Worklist)) + SCEVOperand WorkItem = Worklist.pop_back_val(); + if (isHighCostExpansionHelper(WorkItem, L, *At, BudgetRemaining, + *TTI, Processed, Worklist)) return true; } assert(BudgetRemaining >= 0 && "Should have returned from inner loop."); @@ -366,11 +379,11 @@ Value *expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *I, bool Root); /// Recursive helper function for isHighCostExpansion. - bool isHighCostExpansionHelper(const SCEV *S, Loop *L, const Instruction &At, - int &BudgetRemaining, - const TargetTransformInfo &TTI, - SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist); + bool isHighCostExpansionHelper( + SCEVOperand &WorkItem, Loop *L, const Instruction &At, int &BudgetRemaining, + const TargetTransformInfo &TTI, + SmallPtrSetImpl &Processed, + SmallVectorImpl &Worklist); /// Insert the specified binary operator, doing a small amount of work to /// avoid inserting an obviously redundant operation, and hoisting to an Index: llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2172,13 +2172,128 @@ return None; } +template static int costAndCollectOperands( + SCEVOperand &WorkItem, const TargetTransformInfo &TTI, + TargetTransformInfo::TargetCostKind CostKind, + SmallVectorImpl &Worklist) { + + const T *S = cast(WorkItem.S); + int Cost = 0; + SmallVector Opcodes; + + // In this polynominal, we may have some zero operands, and we shouldn't + // really charge for those. So how many non-zero coeffients are there? + int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { + return !Op->isZero(); + }); + + // Ignoring constant term (operand 0), how many of the coeffients are u> 1? + int NumNonZeroDegreeNonOneTerms = + llvm::count_if(S->operands(), [](const SCEV *Op) { + auto *SConst = dyn_cast(Op); + return !SConst || SConst->getAPInt().ugt(1); + }); + + auto CastCost = [&](unsigned Opcode) { + Opcodes.push_back(Opcode); + return TTI.getCastInstrCost(Opcode, S->getType(), + S->getOperand(0)->getType(), + TTI::CastContextHint::None, CostKind); + }; + + auto ArithCost = [&](unsigned Opcode, unsigned NumRequired) { + Opcodes.push_back(Opcode); + return NumRequired * + TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); + }; + + switch (S->getSCEVType()) { + default: + return 0; + case scTruncate: + Cost = CastCost(Instruction::Trunc); + break; + case scZeroExtend: + Cost = CastCost(Instruction::ZExt); + break; + case scSignExtend: + Cost = CastCost(Instruction::SExt); + break; + case scUDivExpr: { + unsigned Opcode = Instruction::UDiv; + if (auto *SC = dyn_cast(S->getOperand(1))) + if (SC->getAPInt().isPowerOf2()) + Opcode = Instruction::LShr; + Cost = ArithCost(Opcode, 1); + break; + } + case scAddExpr: + Cost = ArithCost(Instruction::Add, NumTerms - 1); + break; + case scMulExpr: + // TODO: this is a very pessimistic cost modelling for Mul, + // because of Bin Pow algorithm actually used by the expander, + // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). + Cost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + break; + case scSMaxExpr: + case scUMaxExpr: + case scSMinExpr: + case scUMinExpr: { + Type *OpType = S->getOperand(0)->getType(); + Opcodes.push_back(Instruction::ICmp); + Opcodes.push_back(Instruction::Select); + Cost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, + CmpInst::makeCmpResultType(OpType), + CostKind) + + TTI.getCmpSelInstrCost(Instruction::Select, OpType, + CmpInst::makeCmpResultType(OpType), + CostKind); + Cost *= (S->getNumOperands() - 1); + break; + } + case scAddRecExpr: { + assert(NumTerms >= 1 && "Polynominal should have at least one term."); + assert(!(*std::prev(S->operands().end()))->isZero() && + "Last operand should not be zero"); + + // Much like with normal add expr, the polynominal will require + // one less addition than the number of it's terms. + int AddCost = ArithCost(Instruction::Add, NumTerms - 1); + // Here, *each* one of those will require a multiplication. + int MulCost = ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); + Cost = AddCost + MulCost; + + // What is the degree of this polynominal? + int PolyDegree = S->getNumOperands() - 1; + assert(PolyDegree >= 1 && "Should be at least affine."); + + // The final term will be: + // Op_{PolyDegree} * x ^ {PolyDegree} + // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. + // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for + // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. + // FIXME: this is conservatively correct, but might be overly pessimistic. + Cost += MulCost * (PolyDegree - 1); + } + } + + for (unsigned Opc : Opcodes) { + for (unsigned OpIdx = 0; OpIdx < S->getNumOperands(); ++OpIdx) { + Worklist.emplace_back(Opc, OpIdx, S->getOperand(OpIdx)); + } + } + return Cost; +} + bool SCEVExpander::isHighCostExpansionHelper( - const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, + SCEVOperand &WorkItem, Loop *L, const Instruction &At, int &BudgetRemaining, const TargetTransformInfo &TTI, SmallPtrSetImpl &Processed, - SmallVectorImpl &Worklist) { + SmallVectorImpl &Worklist) { if (BudgetRemaining < 0) return true; // Already run out of budget, give up. + const SCEV *S = WorkItem.S; // Was the cost of expansion of this expression already accounted for? if (!Processed.insert(S).second) return false; // We have already accounted for this expression. @@ -2197,43 +2312,14 @@ TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; - if (auto *CastExpr = dyn_cast(S)) { - unsigned Opcode; - switch (S->getSCEVType()) { - case scTruncate: - Opcode = Instruction::Trunc; - break; - case scZeroExtend: - Opcode = Instruction::ZExt; - break; - case scSignExtend: - Opcode = Instruction::SExt; - break; - default: - llvm_unreachable("There are no other cast types."); - } - const SCEV *Op = CastExpr->getOperand(); - BudgetRemaining -= TTI.getCastInstrCost( - Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType(), TTI::CastContextHint::None, CostKind); - Worklist.emplace_back(Op); + if (isa(S)) { + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (auto *UDivExpr = dyn_cast(S)) { - // If the divisor is a power of two count this as a logical right-shift. - if (auto *SC = dyn_cast(UDivExpr->getRHS())) { - if (SC->getAPInt().isPowerOf2()) { - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), - CostKind); - // Note that we don't count the cost of RHS, because it is a constant, - // and we consider those to be free. But if that changes, we would need - // to log2() it first before calling isHighCostExpansionHelper(). - Worklist.emplace_back(UDivExpr->getLHS()); - return false; // Will answer upon next entry into this function. - } - } + } else if (isa(S)) { + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a @@ -2248,116 +2334,25 @@ return false; // Consider it to be free. // Need to count the cost of this UDiv. - BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), - CostKind); - Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); + BudgetRemaining -= Cost; return false; // Will answer upon next entry into this function. - } - - if (const auto *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - assert(NAry->getNumOperands() >= 2 && - "Polynomial should be at least linear"); - - int AddCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - int MulCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - - // In this polynominal, we may have some zero operands, and we shouldn't - // really charge for those. So how many non-zero coeffients are there? - int NumTerms = llvm::count_if(NAry->operands(), - [](const SCEV *S) { return !S->isZero(); }); - assert(NumTerms >= 1 && "Polynominal should have at least one term."); - assert(!(*std::prev(NAry->operands().end()))->isZero() && - "Last operand should not be zero"); - - // Much like with normal add expr, the polynominal will require - // one less addition than the number of it's terms. - BudgetRemaining -= AddCost * (NumTerms - 1); - if (BudgetRemaining < 0) - return true; - - // Ignoring constant term (operand 0), how many of the coeffients are u> 1? - int NumNonZeroDegreeNonOneTerms = - llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), - [](const SCEV *S) { - auto *SConst = dyn_cast(S); - return !SConst || SConst->getAPInt().ugt(1); - }); - // Here, *each* one of those will require a multiplication. - BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; - if (BudgetRemaining < 0) - return true; - - // What is the degree of this polynominal? - int PolyDegree = NAry->getNumOperands() - 1; - assert(PolyDegree >= 1 && "Should be at least affine."); - - // The final term will be: - // Op_{PolyDegree} * x ^ {PolyDegree} - // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. - // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for - // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. - // FIXME: this is conservatively correct, but might be overly pessimistic. - BudgetRemaining -= MulCost * (PolyDegree - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - if (const SCEVNAryExpr *NAry = dyn_cast(S)) { - Type *OpType = NAry->getType(); - - int PairCost; - switch (S->getSCEVType()) { - case scAddExpr: - PairCost = - TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); - break; - case scMulExpr: - // TODO: this is a very pessimistic cost modelling for Mul, - // because of Bin Pow algorithm actually used by the expander, - // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = - TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); - break; - case scSMaxExpr: - case scUMaxExpr: - case scSMinExpr: - case scUMinExpr: - PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind) + - TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType), - CostKind); - break; - default: - llvm_unreachable("There are no other variants here."); - } - + } else if (const SCEVNAryExpr *NAry = dyn_cast(S)) { assert(NAry->getNumOperands() > 1 && "Nary expr should have more than 1 operand."); // The simple nary expr will require one less op (or pair of ops) // than the number of it's terms. - BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); - if (BudgetRemaining < 0) - return true; - - // And finally, the operands themselves should fit within the budget. - Worklist.insert(Worklist.end(), NAry->operands().begin(), - NAry->operands().end()); - return false; // So far so good, though ops may be too costly? - } - - llvm_unreachable("No other scev expressions possible."); + int Cost = + costAndCollectOperands(WorkItem, TTI, CostKind, Worklist); + BudgetRemaining -= Cost; + return BudgetRemaining < 0; + } else if (const auto *NAry = dyn_cast(S)) { + assert(NAry->getNumOperands() >= 2 && + "Polynomial should be at least linear"); + BudgetRemaining -= costAndCollectOperands( + WorkItem, TTI, CostKind, Worklist); + return BudgetRemaining < 0; + } else + llvm_unreachable("No other scev expressions possible."); } Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, Index: llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll +++ llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll @@ -193,7 +193,7 @@ define void @test7(i64 %start, i64* %inc_ptr) { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0:!range !.*]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: loop.preheader: @@ -311,15 +311,12 @@ define void @test3_neg(i64 %start) { ; CHECK-LABEL: @test3_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], -1 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -339,19 +336,16 @@ define void @test4_neg(i64 %start) { ; CHECK-LABEL: @test4_neg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[START:%.*]], 0 -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[START]], i64 0 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[SMAX]], 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 25 ; CHECK-NEXT: br i1 [[CMP]], label [[BACKEDGE]], label [[FOR_END:%.*]] ; CHECK: backedge: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP1]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[LOOP]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i64 [[INDVARS_IV]], -1 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_END]], label [[LOOP]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -405,7 +399,7 @@ define void @test8(i64 %start, i64* %inc_ptr) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !1 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG1:!range !.*]] ; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: loop.preheader: @@ -525,7 +519,7 @@ define void @test11(i64* %inc_ptr) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]] ; CHECK-NEXT: [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0 ; CHECK-NEXT: br i1 [[NE_COND]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: @@ -576,7 +570,7 @@ define void @test12(i64* %inc_ptr) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], !range !0 +; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] Index: llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -457,15 +457,13 @@ ; CHECK-NEXT: br i1 [[E]], label [[FOR_COND_PREHEADER:%.*]], label [[LEAVE:%.*]] ; CHECK: for.cond.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INIT]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[INIT]], [[B:%.*]] -; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[INIT]], i32 [[B]] -; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_COND_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ] ; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 Index: llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll +++ llvm/test/Transforms/LoopUnroll/runtime-small-upperbound.ll @@ -100,63 +100,15 @@ ; UPPER-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 17 ; UPPER-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; UPPER: loop.preheader: -; UPPER-NEXT: [[TMP1:%.*]] = sub i32 16, [[X]] -; UPPER-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 2 -; UPPER-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 -; UPPER-NEXT: [[TMP4:%.*]] = urem i32 [[TMP2]], 6 -; UPPER-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 -; UPPER-NEXT: [[XTRAITER:%.*]] = urem i32 [[TMP5]], 6 -; UPPER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; UPPER-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]] -; UPPER: loop.prol.preheader: -; UPPER-NEXT: br label [[LOOP_PROL:%.*]] -; UPPER: loop.prol: -; UPPER-NEXT: [[IV_PROL:%.*]] = phi i32 [ [[IV_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[X]], [[LOOP_PROL_PREHEADER]] ] -; UPPER-NEXT: [[PTR_PROL:%.*]] = phi i8* [ [[PTR_NEXT_PROL:%.*]], [[LOOP_PROL]] ], [ [[Y]], [[LOOP_PROL_PREHEADER]] ] -; UPPER-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LOOP_PROL]] ] -; UPPER-NEXT: [[IV_NEXT_PROL]] = add nuw i32 [[IV_PROL]], 4 -; UPPER-NEXT: [[PTR_NEXT_PROL]] = getelementptr inbounds i8, i8* [[PTR_PROL]], i32 1 -; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT_PROL]], align 1 -; UPPER-NEXT: [[TMP6:%.*]] = icmp ult i32 [[IV_NEXT_PROL]], 17 -; UPPER-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1 -; UPPER-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 -; UPPER-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]] -; UPPER: loop.prol.loopexit.unr-lcssa: -; UPPER-NEXT: [[IV_UNR_PH:%.*]] = phi i32 [ [[IV_NEXT_PROL]], [[LOOP_PROL]] ] -; UPPER-NEXT: [[PTR_UNR_PH:%.*]] = phi i8* [ [[PTR_NEXT_PROL]], [[LOOP_PROL]] ] -; UPPER-NEXT: br label [[LOOP_PROL_LOOPEXIT]] -; UPPER: loop.prol.loopexit: -; UPPER-NEXT: [[IV_UNR:%.*]] = phi i32 [ [[X]], [[LOOP_PREHEADER]] ], [ [[IV_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ] -; UPPER-NEXT: [[PTR_UNR:%.*]] = phi i8* [ [[Y]], [[LOOP_PREHEADER]] ], [ [[PTR_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ] -; UPPER-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP2]], 5 -; UPPER-NEXT: br i1 [[TMP7]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_PREHEADER_NEW:%.*]] -; UPPER: loop.preheader.new: ; UPPER-NEXT: br label [[LOOP:%.*]] ; UPPER: loop: -; UPPER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[IV_NEXT_5:%.*]], [[LOOP]] ] -; UPPER-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_UNR]], [[LOOP_PREHEADER_NEW]] ], [ [[PTR_NEXT_5:%.*]], [[LOOP]] ] -; UPPER-NEXT: [[IV_NEXT:%.*]] = add nuw i32 [[IV]], 4 -; UPPER-NEXT: [[PTR_NEXT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT]], align 1 -; UPPER-NEXT: [[IV_NEXT_1:%.*]] = add nuw i32 [[IV_NEXT]], 4 -; UPPER-NEXT: [[PTR_NEXT_1:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_1]], align 1 -; UPPER-NEXT: [[IV_NEXT_2:%.*]] = add nuw i32 [[IV_NEXT_1]], 4 -; UPPER-NEXT: [[PTR_NEXT_2:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_1]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_2]], align 1 -; UPPER-NEXT: [[IV_NEXT_3:%.*]] = add nuw i32 [[IV_NEXT_2]], 4 -; UPPER-NEXT: [[PTR_NEXT_3:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_2]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_3]], align 1 -; UPPER-NEXT: [[IV_NEXT_4:%.*]] = add nuw i32 [[IV_NEXT_3]], 4 -; UPPER-NEXT: [[PTR_NEXT_4:%.*]] = getelementptr inbounds i8, i8* [[PTR_NEXT_3]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_4]], align 1 -; UPPER-NEXT: [[IV_NEXT_5]] = add nuw i32 [[IV_NEXT_4]], 4 -; UPPER-NEXT: [[PTR_NEXT_5]] = getelementptr inbounds i8, i8* [[PTR_NEXT_4]], i32 1 -; UPPER-NEXT: store i8 [[ARG]], i8* [[PTR_NEXT_5]], align 1 -; UPPER-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IV_NEXT_5]], 17 -; UPPER-NEXT: br i1 [[TMP8]], label [[LOOP]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]] -; UPPER: exit.loopexit.unr-lcssa: -; UPPER-NEXT: br label [[EXIT_LOOPEXIT]] +; UPPER-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] +; UPPER-NEXT: [[PTR:%.*]] = phi i8* [ [[PTR_NEXT:%.*]], [[LOOP]] ], [ [[Y]], [[LOOP_PREHEADER]] ] +; UPPER-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 4 +; UPPER-NEXT: [[PTR_NEXT]] = getelementptr inbounds i8, i8* [[PTR]], i32 1 +; UPPER-NEXT: store i8 [[ARG:%.*]], i8* [[PTR_NEXT]], align 1 +; UPPER-NEXT: [[TMP1:%.*]] = icmp ult i32 [[IV_NEXT]], 17 +; UPPER-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] ; UPPER: exit.loopexit: ; UPPER-NEXT: br label [[EXIT]] ; UPPER: exit: